diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile index 0e05710a4..fceef154e 100644 --- a/EXTERNAL_HEADERS/Makefile +++ b/EXTERNAL_HEADERS/Makefile @@ -23,6 +23,7 @@ INSTINC_SUBDIRS_I386 = \ EXPORT_FILES = \ ar.h \ stdarg.h \ + stdbool.h \ stdint.h INSTALL_MI_LIST = diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile index fd64ab197..41a4fa6d4 100644 --- a/EXTERNAL_HEADERS/architecture/Makefile +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -15,8 +15,7 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 -EXPORT_FILES = \ - byte_order.h +EXPORT_FILES = INSTALL_MI_LIST = @@ -30,3 +29,6 @@ include $(MakeInc_rule) include $(MakeInc_dir) +build_installhdrs_mi:: + $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) + $(LN) ../../../../../../../../usr/include/architecture/byte_order.h $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/byte_order.h diff --git a/EXTERNAL_HEADERS/architecture/byte_order.h b/EXTERNAL_HEADERS/architecture/byte_order.h deleted file mode 100644 index fe80ee110..000000000 --- a/EXTERNAL_HEADERS/architecture/byte_order.h +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Byte ordering conversion. - */ - -#ifndef _ARCHITECTURE_BYTE_ORDER_H_ -#define _ARCHITECTURE_BYTE_ORDER_H_ - -typedef unsigned long NXSwappedFloat; -typedef unsigned long long NXSwappedDouble; - -#if defined (__ppc__) || defined(__ppc64__) -#include "architecture/ppc/byte_order.h" -#elif defined (__i386__) -#include "architecture/i386/byte_order.h" -#else -#error architecture not supported -#endif - -/* - * Identify the byte order - * of the current host. - */ - -enum NXByteOrder { - NX_UnknownByteOrder, - NX_LittleEndian, - NX_BigEndian -}; - -static __inline__ -enum NXByteOrder -NXHostByteOrder(void) -{ - unsigned int _x; - - _x = (NX_BigEndian << 24) | NX_LittleEndian; - - return ((enum NXByteOrder)*((unsigned char *)&_x)); -} - -/* - * The predicated versions - * are defined here in terms - * of the unpredicated ones. - */ - -#if __BIG_ENDIAN__ - -static __inline__ -unsigned short -NXSwapBigShortToHost( - unsigned short x -) -{ - return (x); -} - -static __inline__ -unsigned int -NXSwapBigIntToHost( - unsigned int x -) -{ - return (x); -} - -static __inline__ -unsigned long -NXSwapBigLongToHost( - unsigned long x -) -{ - return (x); -} - -static __inline__ -unsigned long long -NXSwapBigLongLongToHost( - unsigned long long x -) -{ - return (x); -} - -#ifndef KERNEL - -static __inline__ -double -NXSwapBigDoubleToHost( - NXSwappedDouble x -) -{ - return NXConvertSwappedDoubleToHost(x); -} - -static __inline__ -float -NXSwapBigFloatToHost( - NXSwappedFloat x -) -{ - return NXConvertSwappedFloatToHost(x); -} - -#endif /* KERNEL */ - -static __inline__ -unsigned short -NXSwapHostShortToBig( - unsigned short x -) -{ - return (x); -} - -static __inline__ -unsigned int -NXSwapHostIntToBig( - unsigned int x -) -{ - return (x); -} - -static __inline__ -unsigned long -NXSwapHostLongToBig( - unsigned long x -) -{ - return (x); -} - -static __inline__ -unsigned long long -NXSwapHostLongLongToBig( - unsigned long long x -) -{ - return (x); -} - -#ifndef KERNEL - -static __inline__ -NXSwappedDouble -NXSwapHostDoubleToBig( - double x -) -{ - return NXConvertHostDoubleToSwapped(x); -} - -static __inline__ -NXSwappedFloat -NXSwapHostFloatToBig( - float x -) -{ - return NXConvertHostFloatToSwapped(x); -} - -#endif /* KERNEL */ - -static __inline__ -unsigned short -NXSwapLittleShortToHost( - unsigned short x -) -{ - return (NXSwapShort(x)); -} - -static __inline__ -unsigned int -NXSwapLittleIntToHost( - unsigned int x -) -{ - return (NXSwapInt(x)); -} - -static __inline__ -unsigned long -NXSwapLittleLongToHost( - unsigned long x -) -{ - return (NXSwapLong(x)); -} - -static __inline__ -unsigned long long -NXSwapLittleLongLongToHost( - unsigned long long x -) -{ - return (NXSwapLongLong(x)); -} - -#ifndef KERNEL - -static __inline__ -double -NXSwapLittleDoubleToHost( - NXSwappedDouble x -) -{ - return NXConvertSwappedDoubleToHost(NXSwapDouble(x)); -} - -static __inline__ -float -NXSwapLittleFloatToHost( - NXSwappedFloat x -) -{ - return NXConvertSwappedFloatToHost(NXSwapFloat(x)); -} - -#endif /* KERNEL */ - -static __inline__ -unsigned short -NXSwapHostShortToLittle( - unsigned short x -) -{ - return (NXSwapShort(x)); -} - -static __inline__ -unsigned int -NXSwapHostIntToLittle( - unsigned int x -) -{ - return (NXSwapInt(x)); -} - -static __inline__ -unsigned long -NXSwapHostLongToLittle( - unsigned long x -) -{ - return (NXSwapLong(x)); -} - -static __inline__ -unsigned long long -NXSwapHostLongLongToLittle( - unsigned long long x -) -{ - return (NXSwapLongLong(x)); -} - -#ifndef KERNEL - -static __inline__ -NXSwappedDouble -NXSwapHostDoubleToLittle( - double x -) -{ - return NXSwapDouble(NXConvertHostDoubleToSwapped(x)); -} - -static __inline__ -NXSwappedFloat -NXSwapHostFloatToLittle( - float x -) -{ - return NXSwapFloat(NXConvertHostFloatToSwapped(x)); -} - -#endif /* KERNEL */ -#endif /*__BIG_ENDIAN__ */ - -#if __LITTLE_ENDIAN__ - -static __inline__ -unsigned short -NXSwapBigShortToHost( - unsigned short x -) -{ - return (NXSwapShort(x)); -} - -static __inline__ -unsigned int -NXSwapBigIntToHost( - unsigned int x -) -{ - return (NXSwapInt(x)); -} - -static __inline__ -unsigned long -NXSwapBigLongToHost( - unsigned long x -) -{ - return (NXSwapLong(x)); -} - -static __inline__ -unsigned long long -NXSwapBigLongLongToHost( - unsigned long long x -) -{ - return (NXSwapLongLong(x)); -} - -static __inline__ -double -NXSwapBigDoubleToHost( - NXSwappedDouble x -) -{ - return NXConvertSwappedDoubleToHost(NXSwapDouble(x)); -} - -static __inline__ -float -NXSwapBigFloatToHost( - NXSwappedFloat x -) -{ - return NXConvertSwappedFloatToHost(NXSwapFloat(x)); -} - -static __inline__ -unsigned short -NXSwapHostShortToBig( - unsigned short x -) -{ - return (NXSwapShort(x)); -} - -static __inline__ -unsigned int -NXSwapHostIntToBig( - unsigned int x -) -{ - return (NXSwapInt(x)); -} - -static __inline__ -unsigned long -NXSwapHostLongToBig( - unsigned long x -) -{ - return (NXSwapLong(x)); -} - -static __inline__ -unsigned long long -NXSwapHostLongLongToBig( - unsigned long long x -) -{ - return (NXSwapLongLong(x)); -} - -static __inline__ -NXSwappedDouble -NXSwapHostDoubleToBig( - double x -) -{ - return (NXSwapDouble(NXConvertHostDoubleToSwapped(x))); -} - -static __inline__ -NXSwappedFloat -NXSwapHostFloatToBig( - float x -) -{ - return (NXSwapFloat(NXConvertHostFloatToSwapped(x))); -} - -static __inline__ -unsigned short -NXSwapLittleShortToHost( - unsigned short x -) -{ - return (x); -} - -static __inline__ -unsigned int -NXSwapLittleIntToHost( - unsigned int x -) -{ - return (x); -} - -static __inline__ -unsigned long -NXSwapLittleLongToHost( - unsigned long x -) -{ - return (x); -} - -static __inline__ -unsigned long long -NXSwapLittleLongLongToHost( - unsigned long long x -) -{ - return (x); -} - -static __inline__ -double -NXSwapLittleDoubleToHost( - NXSwappedDouble x -) -{ - return NXConvertSwappedDoubleToHost(x); -} - -static __inline__ -float -NXSwapLittleFloatToHost( - NXSwappedFloat x -) -{ - return NXConvertSwappedFloatToHost(x); -} - -static __inline__ -unsigned short -NXSwapHostShortToLittle( - unsigned short x -) -{ - return (x); -} - -static __inline__ -unsigned int -NXSwapHostIntToLittle( - unsigned int x -) -{ - return (x); -} - -static __inline__ -unsigned long -NXSwapHostLongToLittle( - unsigned long x -) -{ - return (x); -} - -static __inline__ -unsigned long long -NXSwapHostLongLongToLittle( - unsigned long long x -) -{ - return (x); -} - -static __inline__ -NXSwappedDouble -NXSwapHostDoubleToLittle( - double x -) -{ - return NXConvertHostDoubleToSwapped(x); -} - -static __inline__ -NXSwappedFloat -NXSwapHostFloatToLittle( - float x -) -{ - return NXConvertHostFloatToSwapped(x); -} - -#endif /* __LITTLE_ENDIAN__ */ - -#endif /* _ARCHITECTURE_BYTE_ORDER_H_ */ diff --git a/EXTERNAL_HEADERS/architecture/i386/Makefile b/EXTERNAL_HEADERS/architecture/i386/Makefile index 5140ed922..5ab63fe3e 100644 --- a/EXTERNAL_HEADERS/architecture/i386/Makefile +++ b/EXTERNAL_HEADERS/architecture/i386/Makefile @@ -12,13 +12,11 @@ INSTINC_SUBDIRS_I386 = EXPORT_FILES = \ asm_help.h \ cpu.h \ - fpu.h \ io.h \ + pio.h \ sel.h \ tss.h \ - byte_order.h \ desc.h \ - frame.h \ reg_help.h \ table.h diff --git a/EXTERNAL_HEADERS/architecture/i386/byte_order.h b/EXTERNAL_HEADERS/architecture/i386/byte_order.h deleted file mode 100644 index 6b1bb5ed9..000000000 --- a/EXTERNAL_HEADERS/architecture/i386/byte_order.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Byte ordering conversion (for i386). - * - * HISTORY - * - * 8 October 1992 ? at NeXT - * Converted to NXxxx versions. Condensed history. - * - * 18 May 1992 ? at NeXT - * Created. - */ - -static __inline__ -unsigned short -NXSwapShort( - unsigned short inv -) -{ - register unsigned short value = inv; - - __asm__ volatile( "xchgb %h1, %b1" : "=q" (value) : "0" (value)); - - return (value); -} - -static __inline__ -unsigned long -NXSwapInt( - unsigned long inv -) -{ - register unsigned int outv = inv; - - __asm__ volatile( "bswap %0" : "=r" (outv) : "0" (outv)); - - return (outv); -} - -static __inline__ -unsigned long -NXSwapLong( - unsigned long inv -) -{ - unsigned long outv; - - __asm__ volatile( - "bswap %0" - - : "=r" (outv) - : "0" (inv)); - - return (outv); -} - -static __inline__ -unsigned long long -NXSwapLongLong( - unsigned long long inv -) -{ - union llconv { - unsigned long long ull; - unsigned long ul[2]; - } *inp, outv; - - inp = (union llconv *)&inv; - - outv.ul[0] = NXSwapLong(inp->ul[1]); - outv.ul[1] = NXSwapLong(inp->ul[0]); - - return (outv.ull); -} - -static __inline__ NXSwappedFloat -NXConvertHostFloatToSwapped(float x) -{ - union fconv { - float number; - NXSwappedFloat sf; - }; - return ((union fconv *)&x)->sf; -} - -static __inline__ float -NXConvertSwappedFloatToHost(NXSwappedFloat x) -{ - union fconv { - float number; - NXSwappedFloat sf; - }; - return ((union fconv *)&x)->number; -} - -static __inline__ NXSwappedDouble -NXConvertHostDoubleToSwapped(double x) -{ - union dconv { - double number; - NXSwappedDouble sd; - }; - return ((union dconv *)&x)->sd; -} - -static __inline__ double -NXConvertSwappedDoubleToHost(NXSwappedDouble x) -{ - union dconv { - double number; - NXSwappedDouble sd; - }; - return ((union dconv *)&x)->number; -} - -static __inline__ NXSwappedFloat -NXSwapFloat(NXSwappedFloat x) -{ - return NXSwapLong(x); -} - -static __inline__ NXSwappedDouble -NXSwapDouble(NXSwappedDouble x) -{ - return NXSwapLongLong(x); -} diff --git a/EXTERNAL_HEADERS/architecture/i386/fpu.h b/EXTERNAL_HEADERS/architecture/i386/fpu.h deleted file mode 100644 index a353a12b1..000000000 --- a/EXTERNAL_HEADERS/architecture/i386/fpu.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Intel386 Family: Floating Point unit. - * - * HISTORY - * - * 5 October 1992 ? at NeXT - * Added names to previously unamed fields in the mantissa. - * - * 5 April 1992 ? at NeXT - * Created. - */ - -/* - * Data register. - */ - -typedef struct fp_data_reg { - unsigned short mant; - unsigned short mant1 :16, - mant2 :16, - mant3 :16; - unsigned short exp :15, - sign :1; -} fp_data_reg_t; - -/* - * Data register stack. - */ - -typedef struct fp_stack { - fp_data_reg_t ST[8]; -} fp_stack_t; - -/* - * Register stack tag word. - */ - -typedef struct fp_tag { - unsigned short tag0 :2, - tag1 :2, - tag2 :2, - tag3 :2, - tag4 :2, - tag5 :2, - tag6 :2, - tag7 :2; -#define FP_TAG_VALID 0 -#define FP_TAG_ZERO 1 -#define FP_TAG_SPEC 2 -#define FP_TAG_EMPTY 3 -} fp_tag_t; - -/* - * Status word. - */ - -typedef struct fp_status { - unsigned short invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - stkflt :1, - errsumm :1, - c0 :1, - c1 :1, - c2 :1, - tos :3, - c3 :1, - busy :1; -} fp_status_t; - -/* - * Control word. - */ - -typedef struct fp_control { - unsigned short invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - :2, - pc :2, -#define FP_PREC_24B 0 -#define FP_PREC_53B 2 -#define FP_PREC_64B 3 - rc :2, -#define FP_RND_NEAR 0 -#define FP_RND_DOWN 1 -#define FP_RND_UP 2 -#define FP_CHOP 3 - /*inf*/ :1, - :3; -} fp_control_t; - -#include - -/* - * Floating point 'environment' - * used by FSTENV/FLDENV instructions. - */ - -typedef struct fp_env { - fp_control_t control; - unsigned short :16; - fp_status_t status; - unsigned short :16; - fp_tag_t tag; - unsigned short :16; - unsigned int ip; - sel_t cs; - unsigned short opcode; - unsigned int dp; - sel_t ds; - unsigned short :16; -} fp_env_t; - -/* - * Floating point state - * used by FSAVE/FRSTOR instructions. - */ - -typedef struct fp_state { - fp_env_t environ; - fp_stack_t stack; -} fp_state_t; diff --git a/EXTERNAL_HEADERS/architecture/i386/frame.h b/EXTERNAL_HEADERS/architecture/i386/frame.h deleted file mode 100644 index ec5604667..000000000 --- a/EXTERNAL_HEADERS/architecture/i386/frame.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Intel386 Family: Processor exception frame. - * - * HISTORY - * - * 31 August 1992 ? at NeXT - * Added v86 mode stuff. - * - * 8 June 1992 ? at NeXT - * Changed name of write field in err_code_t - * which collided with write() in shlib. - * - * 30 March 1992 ? at NeXT - * Created. - */ - -/* - * Format of the error code - * generated by the hardware - * for certain exceptions. - */ - -typedef union err_code { - struct err_code_normal { - unsigned int ext :1, - tbl :2, -#define ERR_GDT 0 -#define ERR_IDT 1 -#define ERR_LDT 2 - index :13, - :16; - } normal; - struct err_code_pgfault { - unsigned int prot :1, - wrtflt :1, - user :1, - :29; - } pgfault; -} err_code_t; - -#include - -/* - * The actual hardware exception frame - * is variable in size. An error code is - * only pushed for certain exceptions. - * Previous stack information is only - * pushed for exceptions that cause a - * change in privilege level. The dpl - * field of the saved CS selector can be - * used to determine whether this is the - * case. If the interrupted task was - * executing in v86 mode, then the data - * segment registers are also present in - * the exception frame (in addition to - * previous stack information). This - * case can be determined by examining - * eflags. - */ - -typedef struct except_frame { - err_code_t err; - unsigned int eip; - sel_t cs; - unsigned int :0; - unsigned int eflags; - unsigned int esp; - sel_t ss; - unsigned int :0; - unsigned short v_es; - unsigned int :0; - unsigned short v_ds; - unsigned int :0; - unsigned short v_fs; - unsigned int :0; - unsigned short v_gs; - unsigned int :0; -} except_frame_t; - -/* - * Values in eflags. - */ - -#ifndef EFL_CF /* FIXME */ -#define EFL_CF 0x00001 -#define EFL_PF 0x00004 -#define EFL_AF 0x00010 -#define EFL_ZF 0x00040 -#define EFL_SF 0x00080 -#define EFL_TF 0x00100 -#define EFL_IF 0x00200 -#define EFL_DF 0x00400 -#define EFL_OF 0x00800 -#define EFL_IOPL 0x03000 -#define EFL_NT 0x04000 -#define EFL_RF 0x10000 -#define EFL_VM 0x20000 -#define EFL_AC 0x40000 -#endif - -#define EFL_CLR 0xfff88028 -#define EFL_SET 0x00000002 diff --git a/EXTERNAL_HEADERS/architecture/i386/pio.h b/EXTERNAL_HEADERS/architecture/i386/pio.h new file mode 100644 index 000000000..5fb0fafcf --- /dev/null +++ b/EXTERNAL_HEADERS/architecture/i386/pio.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +#ifndef _ARCH_I386_PIO_H_ +#define _ARCH_I386_PIO_H_ + +typedef unsigned short i386_ioport_t; + +#if defined(__GNUC__) +static __inline__ unsigned long inl( + i386_ioport_t port) +{ + unsigned long datum; + __asm__ volatile("inl %w1, %0" : "=a" (datum) : "Nd" (port)); + return(datum); +} + +static __inline__ unsigned short inw( + i386_ioport_t port) +{ + unsigned short datum; + __asm__ volatile("inw %w1, %w0" : "=a" (datum) : "Nd" (port)); + return(datum); +} + +static __inline__ unsigned char inb( + i386_ioport_t port) +{ + unsigned char datum; + __asm__ volatile("inb %w1, %b0" : "=a" (datum) : "Nd" (port)); + return(datum); +} + +static __inline__ void outl( + i386_ioport_t port, + unsigned long datum) +{ + __asm__ volatile("outl %0, %w1" : : "a" (datum), "Nd" (port)); +} + +static __inline__ void outw( + i386_ioport_t port, + unsigned short datum) +{ + __asm__ volatile("outw %w0, %w1" : : "a" (datum), "Nd" (port)); +} + +static __inline__ void outb( + i386_ioport_t port, + unsigned char datum) +{ + __asm__ volatile("outb %b0, %w1" : : "a" (datum), "Nd" (port)); +} +#endif /* defined(__GNUC__) */ +#endif /* _ARCH_I386_PIO_H_ */ diff --git a/EXTERNAL_HEADERS/architecture/ppc/Makefile b/EXTERNAL_HEADERS/architecture/ppc/Makefile index 8cf3b09c2..374f3bd9a 100644 --- a/EXTERNAL_HEADERS/architecture/ppc/Makefile +++ b/EXTERNAL_HEADERS/architecture/ppc/Makefile @@ -12,7 +12,6 @@ INSTINC_SUBDIRS_PPC = EXPORT_FILES = \ asm_help.h \ basic_regs.h \ - byte_order.h \ cframe.h \ fp_regs.h \ macro_help.h \ diff --git a/EXTERNAL_HEADERS/architecture/ppc/byte_order.h b/EXTERNAL_HEADERS/architecture/ppc/byte_order.h deleted file mode 100644 index 4669264e4..000000000 --- a/EXTERNAL_HEADERS/architecture/ppc/byte_order.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 NeXT Software, Inc. - * - * Byte ordering conversion (for ppc). - */ - -static __inline__ -unsigned short -NXSwapShort( - unsigned short inv -) -{ - union sconv { - unsigned short us; - unsigned char uc[2]; - } *inp, outv; - - inp = (union sconv *)&inv; - - outv.uc[0] = inp->uc[1]; - outv.uc[1] = inp->uc[0]; - - return (outv.us); -} - -static __inline__ -unsigned int -NXSwapInt( - unsigned int inv -) -{ - union iconv { - unsigned int ui; - unsigned char uc[4]; - } *inp, outv; - - inp = (union iconv *)&inv; - - outv.uc[0] = inp->uc[3]; - outv.uc[1] = inp->uc[2]; - outv.uc[2] = inp->uc[1]; - outv.uc[3] = inp->uc[0]; - - return (outv.ui); -} - -static __inline__ -unsigned long long -NXSwapLongLong( - unsigned long long inv -) -{ - union llconv { - unsigned long long ull; - unsigned char uc[8]; - } *inp, outv; - - inp = (union llconv *)&inv; - - outv.uc[0] = inp->uc[7]; - outv.uc[1] = inp->uc[6]; - outv.uc[2] = inp->uc[5]; - outv.uc[3] = inp->uc[4]; - outv.uc[4] = inp->uc[3]; - outv.uc[5] = inp->uc[2]; - outv.uc[6] = inp->uc[1]; - outv.uc[7] = inp->uc[0]; - - return (outv.ull); -} - -#if defined(__LP64__) - -static __inline__ -unsigned long -NXSwapLong( - unsigned long inv -) -{ - union llconv { - unsigned long ul; - unsigned char uc[8]; - } *inp, outv; - - inp = (union llconv *)&inv; - - outv.uc[0] = inp->uc[7]; - outv.uc[1] = inp->uc[6]; - outv.uc[2] = inp->uc[5]; - outv.uc[3] = inp->uc[4]; - outv.uc[4] = inp->uc[3]; - outv.uc[5] = inp->uc[2]; - outv.uc[6] = inp->uc[1]; - outv.uc[7] = inp->uc[0]; - - return (outv.ul); -} - -#else - -static __inline__ -unsigned long -NXSwapLong( - unsigned long inv -) -{ - union lconv { - unsigned long ul; - unsigned char uc[4]; - } *inp, outv; - - inp = (union lconv *)&inv; - - outv.uc[0] = inp->uc[3]; - outv.uc[1] = inp->uc[2]; - outv.uc[2] = inp->uc[1]; - outv.uc[3] = inp->uc[0]; - - return (outv.ul); -} - -#endif /* __LP64__ */ - -#ifndef KERNEL - -static __inline__ NXSwappedFloat -NXConvertHostFloatToSwapped(float x) -{ - union fconv { - float number; - NXSwappedFloat sf; - }; - return ((union fconv *)&x)->sf; -} - -static __inline__ float -NXConvertSwappedFloatToHost(NXSwappedFloat x) -{ - union fconv { - float number; - NXSwappedFloat sf; - }; - return ((union fconv *)&x)->number; -} - -static __inline__ NXSwappedDouble -NXConvertHostDoubleToSwapped(double x) -{ - union dconv { - double number; - NXSwappedDouble sd; - }; - return ((union dconv *)&x)->sd; -} - -static __inline__ double -NXConvertSwappedDoubleToHost(NXSwappedDouble x) -{ - union dconv { - double number; - NXSwappedDouble sd; - }; - return ((union dconv *)&x)->number; -} - -static __inline__ NXSwappedFloat -NXSwapFloat(NXSwappedFloat x) -{ - return NXSwapLong(x); -} - -static __inline__ NXSwappedDouble -NXSwapDouble(NXSwappedDouble x) -{ - return NXSwapLongLong(x); -} - -#endif /* ! KERNEL */ diff --git a/EXTERNAL_HEADERS/mach-o/fat.h b/EXTERNAL_HEADERS/mach-o/fat.h index e964a1965..1a3312ae1 100644 --- a/EXTERNAL_HEADERS/mach-o/fat.h +++ b/EXTERNAL_HEADERS/mach-o/fat.h @@ -39,10 +39,9 @@ * and contains the constants for the possible values of these types. */ #include -#include #define FAT_MAGIC 0xcafebabe -#define FAT_CIGAM NXSwapLong(FAT_MAGIC) +#define FAT_CIGAM 0xbebafeca struct fat_header { unsigned long magic; /* FAT_MAGIC */ diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h index d0d148fa9..5b917af6a 100644 --- a/EXTERNAL_HEADERS/mach-o/loader.h +++ b/EXTERNAL_HEADERS/mach-o/loader.h @@ -43,7 +43,6 @@ * states and the structures of those flavors for each machine. */ #include -#include /* * The mach header appears at the very beginning of the object file; it @@ -76,11 +75,11 @@ struct mach_header_64 { /* Constant for the magic field of the mach_header (32-bit architectures) */ #define MH_MAGIC 0xfeedface /* the mach magic number */ -#define MH_CIGAM NXSwapInt(MH_MAGIC) +#define MH_CIGAM 0xcefaedfe /* Constant for the magic field of the mach_header_64 (64-bit architectures) */ #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ -#define MH_CIGAM_64 NXSwapInt(MH_MAGIC_64) +#define MH_CIGAM_64 0xcffaedfe /* Constants for the cmd field of new load commands, the type */ #define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ @@ -133,6 +132,10 @@ struct mach_header_64 { linker when loaded. */ #define MH_PREBOUND 0x10 /* the file has it's dynamic undefined references prebound. */ +#define MH_ALLOW_STACK_EXECUTION 0x20000/* When this bit is set, all stacks + in the task will be given stack + execution privilege. Only used in + MH_EXECUTE filetypes. */ /* * The load commands directly follow the mach_header. The total size of all diff --git a/EXTERNAL_HEADERS/stdbool.h b/EXTERNAL_HEADERS/stdbool.h new file mode 100644 index 000000000..be545ce1a --- /dev/null +++ b/EXTERNAL_HEADERS/stdbool.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2000 Jeroen Ruigrok van der Werven + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/include/stdbool.h,v 1.6 2002/08/16 07:33:14 alfred Exp $ + */ + +#ifndef _STDBOOL_H_ +#define _STDBOOL_H_ + +#define __bool_true_false_are_defined 1 + +#ifndef __cplusplus + +#define false 0 +#define true 1 + +#define bool _Bool +#if __STDC_VERSION__ < 199901L && __GNUC__ < 3 +typedef int _Bool; +#endif + +#endif /* !__cplusplus */ + +#endif /* !_STDBOOL_H_ */ diff --git a/bsd/bsm/audit_kevents.h b/bsd/bsm/audit_kevents.h index 3ffd93037..73bbf2ce2 100644 --- a/bsd/bsm/audit_kevents.h +++ b/bsd/bsm/audit_kevents.h @@ -324,6 +324,7 @@ #define AUE_SWAPOFF 355 #define AUE_INITPROCESS 356 #define AUE_MAPFD 357 +#define AUE_TASKNAMEFORPID AUE_NULL #define AUE_TASKFORPID 358 #define AUE_PIDFORTASK 359 #define AUE_SYSCTL_NONADMIN 360 diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER index 90cb6ba5c..9ccacbf12 100644 --- a/bsd/conf/MASTER +++ b/bsd/conf/MASTER @@ -182,7 +182,7 @@ options NFSSERVER # Be an NFS server # # AppleTalk Support # options NETAT # AppleTalk support # -options AURP_SUPPORT # AppleTalk Update Routing # +#options AURP_SUPPORT # AppleTalk Update Routing # # # Machine Independent Apple Features # diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386 index 6c9d460bc..d7d200932 100644 --- a/bsd/conf/MASTER.i386 +++ b/bsd/conf/MASTER.i386 @@ -45,9 +45,9 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # -# RELEASE = [intel pc mach medium event vol pst gdb kernobjc libdriver fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock volfs devfs revfs hfs mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs] +# RELEASE = [intel pc mach medium event vol pst gdb kernobjc libdriver fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock volfs devfs revfs hfs mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs netat] # PROFILE = [RELEASE profile] -# DEBUG = [intel pc mach medium event vol pst gdb kernobjc libdriver_g fixpri debug simple_clock mdebug kernserv driverkit xpr_debug uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs hfs volfs devfs mach_assert mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs] +# DEBUG = [intel pc mach medium event vol pst gdb kernobjc libdriver_g fixpri debug simple_clock mdebug kernserv driverkit xpr_debug uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs hfs volfs devfs mach_assert mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs netat] # ###################################################################### # diff --git a/bsd/conf/Makefile.i386 b/bsd/conf/Makefile.i386 index 0b3a6a0d1..2eb348c41 100644 --- a/bsd/conf/Makefile.i386 +++ b/bsd/conf/Makefile.i386 @@ -290,7 +290,75 @@ OBJS_NO_WERROR = \ sysctl.o \ unix_startup.o \ memdev.o \ - init_sysent.o + randomdev.o \ + sha1mod.o \ + inet_ntop.o \ + vfs_attrlist.o \ + volfs_vnops.o \ + bpf_filter.o \ + devtimer.o \ + in_arp.o \ + ip_ecn.o \ + key_debug.o \ + nfs_serv.o \ + nfs_lock.o \ + kern_lockf.o \ + kern_subr.o \ + sysv_msg.o \ + vnode_pager.o \ + dp_backing_file.o \ + vm_unix.o \ + param.o \ + mem.o \ + km.o \ + init_sysent.o \ + at.o \ + adsp.o \ + adsp_CLDeny.o \ + adsp_CLListen.o \ + adsp_Close.o \ + adsp_Control.o \ + adsp_Init.o \ + adsp_InitGlobals.o \ + adsp_NewCID.o \ + adsp_Open.o \ + adsp_Options.o \ + adsp_Packet.o \ + adsp_Read.o \ + adsp_RxAttn.o \ + adsp_RxData.o \ + adsp_Status.o \ + adsp_Timer.o \ + adsp_TimerElem.o \ + adsp_Write.o \ + adsp_attention.o \ + adsp_misc.o \ + adsp_reset.o \ + adsp_stream.o \ + at_proto.o \ + at_pcb.o \ + atp_alloc.o \ + atp_misc.o \ + atp_open.o \ + atp_read.o \ + atp_write.o \ + ddp_aarp.o \ + ddp_aep.o \ + ddp_brt.o \ + ddp_lap.o \ + ddp_nbp.o \ + ddp_proto.o \ + ddp_r_rtmp.o \ + ddp_r_zip.o \ + ddp_rtmp.o \ + ddp_rtmptable.o \ + ddp_sip.o \ + ddp.o \ + sys_dep.o \ + sys_glue.o \ + drv_dep.o \ + ddp_usrreq.o \ + asp_proto.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff --git a/bsd/conf/files b/bsd/conf/files index 2b6779be7..d15aa03ea 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -547,6 +547,8 @@ bsd/kern/posix_shm.c standard bsd/kern/qsort.c standard bsd/kern/kpi_socket.c standard bsd/kern/kpi_socketfilter.c standard +bsd/kern/proc_info.c standard +bsd/kern/socket_info.c standard bsd/vm/vnode_pager.c standard bsd/vm/vm_unix.c standard @@ -556,3 +558,6 @@ bsd/uxkern/ux_exception.c standard bsd/conf/param.c standard ./ioconf.c standard + +bsd/dev/chud/chud_bsd_callback.c standard +bsd/dev/chud/chud_process.c standard diff --git a/bsd/conf/files.i386 b/bsd/conf/files.i386 index cc998565b..3eb14dd0d 100644 --- a/bsd/conf/files.i386 +++ b/bsd/conf/files.i386 @@ -10,9 +10,11 @@ bsd/dev/i386/km.c standard bsd/dev/i386/kern_machdep.c standard bsd/dev/i386/memmove.c standard bsd/dev/i386/stubs.c standard +bsd/dev/i386/systemcalls.c standard bsd/dev/i386/lock_stubs.c standard bsd/dev/i386/sysctl.c standard bsd/dev/i386/unix_signal.c standard +bsd/dev/i386/munge.s standard bsd/kern/bsd_stubs.c standard diff --git a/bsd/conf/files.ppc b/bsd/conf/files.ppc index 36adc9a65..c24d147e5 100644 --- a/bsd/conf/files.ppc +++ b/bsd/conf/files.ppc @@ -19,8 +19,5 @@ bsd/dev/ppc/xsumas.s standard bsd/dev/ppc/sysctl.c standard bsd/dev/ppc/munge.s standard -bsd/dev/ppc/chud/chud_bsd_callback.c standard -bsd/dev/ppc/chud/chud_process.c standard - bsd/kern/bsd_stubs.c standard diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 0aede52e9..2a4e0e347 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -79,8 +79,9 @@ struct timezone tz = { TIMEZONE, PST }; #define NPROC (20 + 16 * MAXUSERS) -#define HNPROC (20 + 64 * MAXUSERS) +#define HNPROC 2500 /* based on thread_max */ int maxproc = NPROC; +int maxprocperuid = NPROC/2; __private_extern__ int hard_maxproc = HNPROC; /* hardcoded limit */ int nprocs = 0; /* XXX */ diff --git a/bsd/dev/ppc/chud/chud_bsd_callback.c b/bsd/dev/chud/chud_bsd_callback.c similarity index 52% rename from bsd/dev/ppc/chud/chud_bsd_callback.c rename to bsd/dev/chud/chud_bsd_callback.c index 0302458f5..bc2f00422 100644 --- a/bsd/dev/ppc/chud/chud_bsd_callback.c +++ b/bsd/dev/chud/chud_bsd_callback.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,18 +34,30 @@ typedef void (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); static chudxnu_kdebug_callback_func_t kdebug_callback_fn = NULL; -extern void kdbg_control_chudxnu(int val, void *fn); +kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t); +kern_return_t chudxnu_kdebug_callback_cancel(void); + +extern void kdbg_control_chud(int val, void *fn); extern unsigned int kdebug_enable; -static void chudxnu_private_kdebug_callback(unsigned int debugid, unsigned int arg0, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4) +static void +chudxnu_private_kdebug_callback( + unsigned int debugid, + unsigned int arg0, + unsigned int arg1, + unsigned int arg2, + unsigned int arg3, + unsigned int arg4) { - if(kdebug_callback_fn) { - (kdebug_callback_fn)(debugid, arg0, arg1, arg2, arg3, arg4); + chudxnu_kdebug_callback_func_t fn = kdebug_callback_fn; + + if(fn) { + (fn)(debugid, arg0, arg1, arg2, arg3, arg4); } } -__private_extern__ -kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func) +__private_extern__ kern_return_t +chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func) { kdebug_callback_fn = func; @@ -55,8 +67,8 @@ kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func) return KERN_SUCCESS; } -__private_extern__ -kern_return_t chudxnu_kdebug_callback_cancel(void) +__private_extern__ kern_return_t +chudxnu_kdebug_callback_cancel(void) { kdebug_callback_fn = NULL; kdbg_control_chud(FALSE, NULL); @@ -64,3 +76,42 @@ kern_return_t chudxnu_kdebug_callback_cancel(void) return KERN_SUCCESS; } + +#pragma mark **** CHUD syscall **** +typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint32_t code, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); +static chudxnu_syscall_callback_func_t syscall_callback_fn = NULL; + +kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); +kern_return_t chudxnu_syscall_callback_cancel(void); + +int chud(p, uap, retval) + struct proc *p; + struct chud_args *uap; + register_t *retval; +{ +#pragma unused (p) + + chudxnu_syscall_callback_func_t fn = syscall_callback_fn; + + if(!fn) { + return EINVAL; + } + + *retval = fn(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, uap->arg5); + + return 0; +} + +__private_extern__ +kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func) +{ + syscall_callback_fn = func; + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_syscall_callback_cancel(void) +{ + syscall_callback_fn = NULL; + return KERN_SUCCESS; +} diff --git a/bsd/dev/ppc/chud/chud_process.c b/bsd/dev/chud/chud_process.c similarity index 64% rename from bsd/dev/ppc/chud/chud_process.c rename to bsd/dev/chud/chud_process.c index 1fad77e4c..c409c6275 100644 --- a/bsd/dev/ppc/chud/chud_process.c +++ b/bsd/dev/chud/chud_process.c @@ -23,9 +23,15 @@ #include #include #include // vn_getpath() +#include +#include -__private_extern__ -int chudxnu_pid_for_task(task_t task) +int chudxnu_pid_for_task(task_t task); +task_t chudxnu_task_for_pid(int pid); +int chudxnu_current_pid(void); + +__private_extern__ int +chudxnu_pid_for_task(task_t task) { struct proc *p; @@ -38,8 +44,8 @@ int chudxnu_pid_for_task(task_t task) return -1; } -__private_extern__ -task_t chudxnu_task_for_pid(int pid) +__private_extern__ task_t +chudxnu_task_for_pid(int pid) { struct proc *p = pfind(pid); if(p) { @@ -48,8 +54,23 @@ task_t chudxnu_task_for_pid(int pid) return TASK_NULL; } -__private_extern__ -int chudxnu_current_pid(void) +__private_extern__ int +chudxnu_current_pid(void) { - return current_proc()->p_pid; + int pid = -1; + struct uthread *ut = get_bsdthread_info(current_thread()); + task_t t = current_task(); + + if(t != TASK_NULL) { + pid = chudxnu_pid_for_task(t); + } else { + // no task, so try looking in the uthread and/or proc + pid = current_proc()->p_pid; + + if(ut && ut->uu_proc) { + pid = ut->uu_proc->p_pid; + } + } + + return pid; } diff --git a/bsd/dev/i386/kern_machdep.c b/bsd/dev/i386/kern_machdep.c index a78df67ed..6ff80a540 100644 --- a/bsd/dev/i386/kern_machdep.c +++ b/bsd/dev/i386/kern_machdep.c @@ -31,92 +31,27 @@ #include #include #include - -extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); +#include +#include /********************************************************************** * Routine: grade_binary() * - * Function: Return a relative preference for exectypes and - * execsubtypes in fat executable files. The higher the - * grade, the higher the preference. A grade of 0 means - * not acceptable. + * Function: Keep the API the same between PPC and X86; always say + * any CPU subtype is OK with us, but only OK CPU types + * for which we are actually capable of executing the + * binary, either directly or via an imputed interpreter. **********************************************************************/ int -grade_binary(__unused cpu_type_t exectype, cpu_subtype_t execsubtype) +grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype) { - int cpusubtype = cpu_subtype(); - - switch (cpusubtype) { - case CPU_SUBTYPE_386: - switch (execsubtype) { - case CPU_SUBTYPE_386: - return 1; - default: - return 0; - } - - case CPU_SUBTYPE_486: - switch (execsubtype) { - case CPU_SUBTYPE_386: - return 1; - - case CPU_SUBTYPE_486SX: - return 2; - - case CPU_SUBTYPE_486: - return 3; - - default: - return 0; - } - - case CPU_SUBTYPE_486SX: - switch (execsubtype) { - case CPU_SUBTYPE_386: - return 1; - - case CPU_SUBTYPE_486: - return 2; - - case CPU_SUBTYPE_486SX: - return 3; - - default: - return 0; - } - - case CPU_SUBTYPE_586: - switch (execsubtype) { - case CPU_SUBTYPE_386: - return 1; - - case CPU_SUBTYPE_486SX: - return 2; - - case CPU_SUBTYPE_486: - return 3; - - case CPU_SUBTYPE_586: - return 4; - - default: - return 0; - } - - default: - if ( CPU_SUBTYPE_INTEL_MODEL(execsubtype) == - CPU_SUBTYPE_INTEL_MODEL_ALL) { - if ( CPU_SUBTYPE_INTEL_FAMILY(cpusubtype) >= - CPU_SUBTYPE_INTEL_FAMILY(execsubtype)) - return CPU_SUBTYPE_INTEL_FAMILY_MAX - - CPU_SUBTYPE_INTEL_FAMILY(cpusubtype) - - CPU_SUBTYPE_INTEL_FAMILY(execsubtype); - } - else { - if ( cpusubtype == execsubtype) - return CPU_SUBTYPE_INTEL_FAMILY_MAX + 1; - } + switch(exectype) { + case CPU_TYPE_X86: /* native */ + case CPU_TYPE_POWERPC: /* via translator */ + return 1; + case CPU_TYPE_X86_64: /* native 64-bit */ + return (ml_is64bit() && execsubtype == CPU_SUBTYPE_X86_64_ALL) ? 2 : 0; + default: /* all other binary types */ return 0; } } diff --git a/bsd/dev/i386/km.c b/bsd/dev/i386/km.c index 6de6ff0bb..a75e4ea54 100644 --- a/bsd/dev/i386/km.c +++ b/bsd/dev/i386/km.c @@ -40,6 +40,7 @@ #include #include #include +#include extern int hz; @@ -135,7 +136,10 @@ kmopen( bzero(&video, sizeof(video)); PE_current_console(&video); - if( video.v_width != 0 && video.v_height != 0 ) { + if( video.v_display == VGA_TEXT_MODE ) { + wp->ws_col = video.v_width; + wp->ws_row = video.v_height; + } else if( video.v_width != 0 && video.v_height != 0 ) { wp->ws_col = video.v_width / wp->ws_xpixel; wp->ws_row = video.v_height / wp->ws_ypixel; } else { diff --git a/bsd/dev/i386/mem.c b/bsd/dev/i386/mem.c index 892be2473..7bcebdae6 100644 --- a/bsd/dev/i386/mem.c +++ b/bsd/dev/i386/mem.c @@ -79,8 +79,9 @@ #include #include /* for kernel_map */ -extern vm_offset_t kvtophys(vm_offset_t va); +extern addr64_t kvtophys(vm_offset_t va); extern boolean_t kernacc(off_t, size_t ); +extern int setup_kmem; static caddr_t devzerobuf; @@ -104,9 +105,14 @@ mmwrite(dev_t dev, struct uio *uio) } int -mmioctl(__unused dev_t dev, u_long cmd, __unused caddr_t data, +mmioctl(dev_t dev, u_long cmd, __unused caddr_t data, __unused int flag, __unused struct proc *p) { + int minnum = minor(dev); + + if ((setup_kmem == 0) && ((minnum == 0) || (minnum == 1))) + return(EINVAL); + switch (cmd) { case FIONBIO: case FIOASYNC: @@ -128,6 +134,7 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) vm_offset_t where; vm_size_t size; + while (uio_resid(uio) > 0 && error == 0) { if (uio_iov_len(uio) == 0) { uio_next_iov(uio); @@ -140,6 +147,9 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) /* minor device 0 is physical memory */ case 0: + if (setup_kmem == 0) + return(ENODEV); + v = trunc_page(uio->uio_offset); if (uio->uio_offset >= mem_size) goto fault; @@ -158,6 +168,8 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) /* minor device 1 is kernel memory */ case 1: + if (setup_kmem == 0) + return(ENODEV); /* Do some sanity checking */ if (((vm_address_t)uio->uio_offset >= VM_MAX_KERNEL_ADDRESS) || ((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_ADDRESS)) diff --git a/bsd/dev/i386/munge.s b/bsd/dev/i386/munge.s new file mode 100644 index 000000000..5a4dc52b2 --- /dev/null +++ b/bsd/dev/i386/munge.s @@ -0,0 +1,205 @@ +/* + * Coyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Syscall argument mungers. + * + * The data to be munged has been explicitly copied in to the arguement area, + * and will be munged in place in the uu_arg[] array. Because of this, the + * functions all take the same arguments as their PPC equivalents, but the + * first argument is ignored. These mungers are for 32-bit app's syscalls, + * since 64-bit args are stored into the save area (which overlays the + * uu_args) in the order the syscall ABI calls for. + * + * The issue is that the incoming args are 32-bit, but we must expand + * them in place into 64-bit args, as if they were from a 64-bit process. + * + * There are several functions in this file. Each takes two parameters: + * + * void munge_XXXX( const void *regs, void *uu_args); + * + * The name of the function encodes the number and type of the parameters, + * as follows: + * + * w = a 32-bit value such as an int or a 32-bit ptr, that does not + * require sign extension. These are handled by zeroing a word + * of output, and copying a word from input to output. + * + * s = a 32-bit value such as a long, which must be sign-extended to + * a 64-bit long-long in the uu_args. These are handled by + * loading a word of input and sign extending it to a double, + * and storing two words of output. + * + * l = a 64-bit long-long. These are handled by copying two words + * of input to the output. + * + * For example, "munge_wls" takes a word, a long-long, and a word. This + * takes four words in the uu_arg[] area: the first word is in one, the + * long-long takes two, and the final word is in the fourth. We store six + * words: the low word is left in place, followed by a 0, followed by the + * two words of the long-long, followed by the low word and the sign extended + * high word of the preceeding low word. + * + * Because this is an in-place modification, we actually start at the end + * of uu_arg[] and work our way back to the beginning of the array. + * + * As you can see, we save a lot of code by collapsing mungers that are + * prefixes or suffixes of each other. + */ +#include + +ENTRY(munge_w) + movl 8(%esp),%ecx // get &uu_args + movl $0,4(%ecx) + ret + +ENTRY(munge_ww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw2 +ENTRY(munge_www) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw3 +ENTRY(munge_wwww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw4 +ENTRY(munge_wwwww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw5 +ENTRY(munge_wwwwww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw6 +ENTRY(munge_wwwwwww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + jmp Lw7 +ENTRY(munge_wwwwwwww) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 28(%ecx),%eax + movl %eax,56(%ecx) + movl %edx,60(%ecx) +Lw7: + movl 24(%ecx),%eax + movl %eax,48(%ecx) + movl %edx,52(%ecx) +Lw6: + movl 20(%ecx),%eax + movl %eax,40(%ecx) + movl %edx,44(%ecx) +Lw5: + movl 16(%ecx),%eax + movl %eax,32(%ecx) + movl %edx,36(%ecx) +Lw4: + movl 12(%ecx),%eax + movl %eax,24(%ecx) + movl %edx,28(%ecx) +Lw3: + movl 8(%ecx),%eax + movl %eax,16(%ecx) + movl %edx,20(%ecx) +Lw2: + movl 4(%ecx),%eax + movl %eax,8(%ecx) + movl %edx,12(%ecx) + movl %edx,4(%ecx) + ret + + +Entry(munge_wl) /* Costs an extra w move to do this */ +ENTRY(munge_wlw) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 12(%ecx),%eax + movl %eax,16(%ecx) + movl %edx,20(%ecx) + movl 8(%ecx),%eax + movl %eax,12(%ecx) + movl 4(%ecx),%eax + movl %eax,8(%ecx) + movl %edx,4(%ecx) + ret + +ENTRY(munge_wwwl) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 12(%ecx),%eax + movl %eax,24(%ecx) + movl 16(%ecx),%eax + movl %eax,28(%ecx) + jmp Lw3 + +ENTRY(munge_wwwwl) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 16(%ecx),%eax + movl %eax,32(%ecx) + movl 20(%ecx),%eax + movl %eax,36(%ecx) + jmp Lw4 + +ENTRY(munge_wwwwwl) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 20(%ecx),%eax + movl %eax,40(%ecx) + movl 24(%ecx),%eax + movl %eax,44(%ecx) + jmp Lw5 + +ENTRY(munge_wsw) + movl 8(%esp),%ecx // get &uu_args + movl 8(%ecx),%eax + movl %eax,16(%ecx) + movl $0,20(%ecx) + movl 4(%ecx),%eax + cltd + movl %eax,8(%ecx) + movl %edx,12(%ecx) + movl $0,4(%ecx) + ret + +ENTRY(munge_wws) + movl 8(%esp),%ecx // get &uu_args + movl 8(%ecx),%eax + cltd + movl %eax,16(%ecx) + movl %edx,20(%ecx) + xorl %edx,%edx + jmp Lw2 + +ENTRY(munge_wwwsw) + movl 8(%esp),%ecx // get &uu_args + movl 16(%ecx),%eax + movl %eax,32(%ecx) + movl $0,36(%ecx) + movl 12(%ecx),%eax + cltd + movl %eax,24(%ecx) + movl %edx,28(%ecx) + xorl %edx,%edx + jmp Lw3 diff --git a/bsd/dev/i386/pio.h b/bsd/dev/i386/pio.h deleted file mode 100644 index 66992563d..000000000 --- a/bsd/dev/i386/pio.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.2 1998/09/30 21:20:45 wsanchez - * Merged in IntelMerge1 (mburg: Intel support) - * - * Revision 1.1.2.1 1998/09/30 18:18:50 mburg - * Changes for Intel port - * - * Revision 1.1.1.1 1998/03/07 02:25:38 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.2 1996/07/31 09:46:36 paire - * Merged with nmk20b7_shared (1.1.11.2 -> 1.1.11.1) - * [96/06/10 paire] - * - * Revision 1.1.11.2 1996/06/13 12:38:25 bernadat - * Do not use inline macros when MACH_ASSERT is configured. - * [96/05/24 bernadat] - * - * Revision 1.1.11.1 1996/05/14 13:50:23 paire - * Added new linl and loutl __inline__. - * Added conditional compilation for [l]{in|oub}[bwl]() __inline__. - * [95/11/24 paire] - * - * Revision 1.1.8.1 1994/09/23 02:00:28 ezf - * change marker to not FREE - * [1994/09/22 21:25:52 ezf] - * - * Revision 1.1.4.5 1993/08/09 19:40:41 dswartz - * Add ANSI prototypes - CR#9523 - * [1993/08/06 17:45:57 dswartz] - * - * Revision 1.1.4.4 1993/06/11 15:17:37 jeffc - * CR9176 - ANSI C violations: inb/outb macros must be changed from - * ({ ... }) to inline functions, with proper type definitions. Callers - * must pass proper types to these functions: 386 I/O port addresses - * are unsigned shorts (not pointers). - * [1993/06/10 14:26:10 jeffc] - * - * Revision 1.1.4.3 1993/06/07 22:09:28 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:00:26 jeffc] - * - * Revision 1.1.4.2 1993/06/04 15:28:45 jeffc - * CR9176 - ANSI problems - - * Added casts to get macros to take caddr_t as an I/O space address. - * [1993/06/04 13:45:55 jeffc] - * - * Revision 1.1 1992/09/30 02:25:51 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/05/14 16:14:20 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:13:56 mrt - * Changed to new Mach copyright - * [91/02/01 17:37:08 mrt] - * - * Revision 2.3 90/12/20 16:36:37 jeffreyh - * changes for __STDC__ - * [90/12/07 jeffreyh] - * - * Revision 2.2 90/11/26 14:48:41 rvb - * Pulled from 2.5 - * [90/11/22 10:09:38 rvb] - * - * [90/08/14 mg32] - * - * Now we know how types are factor in. - * Cleaned up a bunch: eliminated ({ for output and flushed unused - * output variables. - * [90/08/14 rvb] - * - * This is how its done in gcc: - * Created. - * [90/03/26 rvb] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -#ifndef I386_PIO_H -#define I386_PIO_H - -typedef unsigned short i386_ioport_t; - -/* read a longword */ -extern unsigned long inl( - i386_ioport_t port); -/* read a shortword */ -extern unsigned short inw( - i386_ioport_t port); -/* read a byte */ -extern unsigned char inb( - i386_ioport_t port); -/* write a longword */ -extern void outl( - i386_ioport_t port, - unsigned long datum); -/* write a word */ -extern void outw( - i386_ioport_t port, - unsigned short datum); -/* write a longword */ -extern void outb( - i386_ioport_t port, - unsigned char datum); - -/* input an array of longwords */ -extern void linl( - i386_ioport_t port, - int * data, - int count); -/* output an array of longwords */ -extern void loutl( - i386_ioport_t port, - int * data, - int count); - -/* input an array of words */ -extern void linw( - i386_ioport_t port, - int * data, - int count); -/* output an array of words */ -extern void loutw( - i386_ioport_t port, - int * data, - int count); - -/* input an array of bytes */ -extern void linb( - i386_ioport_t port, - char * data, - int count); -/* output an array of bytes */ -extern void loutb( - i386_ioport_t port, - char * data, - int count); - -extern __inline__ unsigned long inl( - i386_ioport_t port) -{ - unsigned long datum; - __asm__ volatile("inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned short inw( - i386_ioport_t port) -{ - unsigned short datum; - __asm__ volatile(".byte 0x66; inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned char inb( - i386_ioport_t port) -{ - unsigned char datum; - __asm__ volatile("inb %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ void outl( - i386_ioport_t port, - unsigned long datum) -{ - __asm__ volatile("outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outw( - i386_ioport_t port, - unsigned short datum) -{ - __asm__ volatile(".byte 0x66; outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outb( - i386_ioport_t port, - unsigned char datum) -{ - __asm__ volatile("outb %0, %1" : : "a" (datum), "d" (port)); -} - -#endif /* I386_PIO_H */ diff --git a/bsd/dev/i386/stubs.c b/bsd/dev/i386/stubs.c index 5a15de0ea..d98a823d6 100644 --- a/bsd/dev/i386/stubs.c +++ b/bsd/dev/i386/stubs.c @@ -37,12 +37,13 @@ #include #include #include +#include /* XXX should be elsewhere (cpeak) */ -extern int set_bsduthreadargs(thread_t, void *, void *); +extern struct proc *i386_current_proc(void); extern void *get_bsduthreadarg(thread_t); extern int *get_bsduthreadrval(thread_t); -extern int *get_bsduthreadlowpridelay(thread_t); +extern void *find_user_regs(thread_t); /* * copy a null terminated string from the kernel address space into @@ -111,56 +112,62 @@ copywithin(void *src, void *dst, size_t count) return 0; } -int -set_bsduthreadargs(thread_t th, void * pcb, __unused void *ignored_arg) -{ +/* + * This is just current_proc() from bsd/kern/bsd_stubs.c, but instead of + * returning kernproc in the non-vfork() case, it can return NULL. This is + * needed because the system call entry point is in osfmk/i386/bsd_i386.c + * instead of bsd/dev/i386, and therefore cannot see some BSD thread + * internals. We need to distinguish kernproc defaulting in the vfork and + * non-vfork cases vs. actually being the real process context. + */ +struct proc * +i386_current_proc(void) +{ struct uthread * ut; - struct proc *p = current_proc(); - - ut = get_bsdthread_info(th); - ut->uu_ar0 = (int *)pcb; - - /* - * Delayed binding of thread credential to process credential. - * - * XXX This doesn't really belong here, but the i386 code has a - * XXX number of seemingly gratuitous structural differences that - * XXX make this the most appropriate place to do the work. - */ - if (ut->uu_ucred != p->p_ucred && - (ut->uu_flag & UT_SETUID) == 0) { - kauth_cred_t old = ut->uu_ucred; - proc_lock(p); - ut->uu_ucred = p->p_ucred; - kauth_cred_ref(ut->uu_ucred); - proc_unlock(p); - if (old != NOCRED) - kauth_cred_rele(old); + struct proc *p; + thread_t thr_act = current_thread(); + + ut = (struct uthread *)get_bsdthread_info(thr_act); + if (ut && (ut->uu_flag & UT_VFORK)) { + if (ut->uu_proc) { + p = ut->uu_proc; + if ((p->p_flag & P_INVFORK) == 0) + panic("returning child proc not under vfork"); + if (p->p_vforkact != (void *)thr_act) + panic("returning child proc which is not cur_act"); + return(p); + } else { + return (kernproc); + } } - return(1); + /* Not in vfork - may return NULL */ + p = (struct proc *)get_bsdtask_info(current_task()); + + return (p); } void * get_bsduthreadarg(thread_t th) { -struct uthread *ut; + void *arg_ptr; + struct uthread *ut; + ut = get_bsdthread_info(th); - return((void *)(ut->uu_arg)); + + if (ml_thread_is64bit(th) == TRUE) + arg_ptr = (void *)saved_state64(find_user_regs(th)); + else + arg_ptr = (void *)(ut->uu_arg); + + return(arg_ptr); } int * get_bsduthreadrval(thread_t th) { -struct uthread *ut; - ut = get_bsdthread_info(th); - return(&ut->uu_rval[0]); -} + struct uthread *ut; -int * -get_bsduthreadlowpridelay(thread_t th) -{ -struct uthread *ut; ut = get_bsdthread_info(th); - return(&ut->uu_lowpri_delay); + return(&ut->uu_rval[0]); } diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index e3fe2fac6..1cb9f08d7 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -65,6 +65,20 @@ hw_cpu_features SYSCTL_HANDLER_ARGS return SYSCTL_OUT(req, buf, strlen(buf) + 1); } +static int +hw_cpu_extfeatures SYSCTL_HANDLER_ARGS +{ + __unused struct sysctl_oid *unused_oidp = oidp; + __unused void *unused_arg1 = arg1; + __unused int unused_arg2 = arg2; + char buf[256]; + + buf[0] = '\0'; + cpuid_get_extfeature_names(cpuid_extfeatures(), buf, sizeof(buf)); + + return SYSCTL_OUT(req, buf, strlen(buf) + 1); +} + SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW, 0, "CPU info"); @@ -80,10 +94,6 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, model_string, CTLTYPE_STRING | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_model_string), -1, hw_cpu_sysctl, "A", "CPU model string"); -SYSCTL_PROC(_machdep_cpu, OID_AUTO, value, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_value), sizeof(uint32_t), - hw_cpu_sysctl, "I", "CPU value"); - SYSCTL_PROC(_machdep_cpu, OID_AUTO, family, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_family), sizeof(uint8_t), hw_cpu_sysctl, "I", "CPU family"); @@ -104,10 +114,14 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, stepping, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_stepping), sizeof(uint8_t), hw_cpu_sysctl, "I", "CPU stepping"); -SYSCTL_PROC(_machdep_cpu, OID_AUTO, feature_bits, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_features), sizeof(uint32_t), +SYSCTL_PROC(_machdep_cpu, OID_AUTO, feature_bits, CTLTYPE_QUAD | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_features), sizeof(uint64_t), hw_cpu_sysctl, "I", "CPU features"); +SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeature_bits, CTLTYPE_QUAD | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_extfeatures), sizeof(uint64_t), + hw_cpu_sysctl, "I", "CPU extended features"); + SYSCTL_PROC(_machdep_cpu, OID_AUTO, signature, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_signature), sizeof(uint32_t), hw_cpu_sysctl, "I", "CPU signature"); @@ -120,6 +134,22 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, features, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, hw_cpu_features, "A", "CPU feature names"); +SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeatures, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, + hw_cpu_extfeatures, "A", "CPU extended feature names"); + +SYSCTL_PROC(_machdep_cpu, OID_AUTO, logical_per_package, + CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_logical_per_package), + sizeof(uint32_t), + hw_cpu_sysctl, "I", "CPU logical cpus per package"); + +SYSCTL_PROC(_machdep_cpu, OID_AUTO, cores_per_package, + CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_cores_per_package), + sizeof(uint32_t), + hw_cpu_sysctl, "I", "CPU cores per package"); + struct sysctl_oid *machdep_sysctl_list[] = { @@ -127,16 +157,19 @@ struct sysctl_oid *machdep_sysctl_list[] = &sysctl__machdep_cpu_vendor, &sysctl__machdep_cpu_brand_string, &sysctl__machdep_cpu_model_string, - &sysctl__machdep_cpu_value, &sysctl__machdep_cpu_family, &sysctl__machdep_cpu_model, &sysctl__machdep_cpu_extmodel, &sysctl__machdep_cpu_extfamily, &sysctl__machdep_cpu_feature_bits, + &sysctl__machdep_cpu_extfeature_bits, &sysctl__machdep_cpu_stepping, &sysctl__machdep_cpu_signature, &sysctl__machdep_cpu_brand, &sysctl__machdep_cpu_features, + &sysctl__machdep_cpu_extfeatures, + &sysctl__machdep_cpu_logical_per_package, + &sysctl__machdep_cpu_cores_per_package, (struct sysctl_oid *) 0 }; diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c new file mode 100644 index 000000000..05c0b70b0 --- /dev/null +++ b/bsd/dev/i386/systemcalls.c @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +extern void unix_syscall(x86_saved_state_t *); +extern void unix_syscall64(x86_saved_state_t *); +extern void unix_syscall_return(int); +extern void *find_user_regs(thread_t); +extern void IOSleep(int); +extern void exit_funnel_section(void); + +extern void Debugger(const char * message); + +/* + * Function: unix_syscall + * + * Inputs: regs - pointer to i386 save area + * + * Outputs: none + */ +void +unix_syscall(x86_saved_state_t *state) +{ + thread_t thread; + void *vt; + unsigned short code; + struct sysent *callp; + int nargs; + int error; + int funnel_type; + vm_offset_t params; + struct proc *p; + struct uthread *uthread; + unsigned int cancel_enable; + x86_saved_state32_t *regs; + + assert(is_saved_state32(state)); + regs = saved_state32(state); + + if (regs->eax == 0x800) + thread_exception_return(); + + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + /* Get the approriate proc; may be different from task's for vfork() */ + if (!(uthread->uu_flag & UT_VFORK)) + p = (struct proc *)get_bsdtask_info(current_task()); + else + p = current_proc(); + + /* Verify that we are not being called from a task without a proc */ + if (p == NULL) { + regs->eax = EPERM; + regs->efl |= EFL_CF; + task_terminate_internal(current_task()); + thread_exception_return(); + /* NOTREACHED */ + } + + //printf("[scall : eax %x]", regs->eax); + code = regs->eax; + params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + + if (callp == sysent) { + code = fuword(params); + params += sizeof (int); + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + } + vt = (void *)uthread->uu_arg; + + nargs = callp->sy_narg * sizeof (syscall_arg_t); + if (nargs != 0) { + sy_munge_t *mungerp; + + assert(nargs <= 8); + + error = copyin((user_addr_t) params, (char *) vt, nargs); + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } + if (code != 180) { + int *ip = (int *)vt; + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + *ip, *(ip+1), *(ip+2), *(ip+3), 0); + } + mungerp = callp->sy_arg_munge32; + + /* + * If non-NULL, then call the syscall argument munger to + * copy in arguments (see xnu/bsd/dev/i386/munge.s); the + * first argument is NULL because we are munging in place + * after a copyin because the ABI currently doesn't use + * registers to pass system call arguments. + */ + if (mungerp != NULL) + (*mungerp)(NULL, vt); + } else + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + /* + * Delayed binding of thread credential to process credential, if we + * are not running with an explicitly set thread credential. + */ + if (uthread->uu_ucred != p->p_ucred && + (uthread->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = uthread->uu_ucred; + proc_lock(p); + uthread->uu_ucred = p->p_ucred; + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(p); + if (old != NOCRED) + kauth_cred_rele(old); + } + + uthread->uu_rval[0] = 0; + uthread->uu_rval[1] = regs->edx; + + cancel_enable = callp->sy_cancel; + + if (cancel_enable == _SYSCALL_CANCEL_NONE) { + uthread->uu_flag |= UT_NOTCANCELPT; + } else { + if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + if (cancel_enable == _SYSCALL_CANCEL_PRE) { + /* system call cancelled; return to handle cancellation */ + regs->eax = (long long)EINTR; + regs->efl |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } else { + thread_abort_safely(thread); + } + } + } + + funnel_type = (callp->sy_funnel & FUNNEL_MASK); + if (funnel_type == KERNEL_FUNNEL) + thread_funnel_set(kernel_flock, TRUE); + + if (KTRPOINT(p, KTR_SYSCALL)) + ktrsyscall(p, code, callp->sy_narg, vt); + + AUDIT_SYSCALL_ENTER(code, p, uthread); + error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); + AUDIT_SYSCALL_EXIT(error, p, uthread); + + if (error == ERESTART) { + /* + * Move the user's pc back to repeat the syscall: + * 5 bytes for a sysenter, or 2 for an int 8x. + * The SYSENTER_TF_CS covers single-stepping over a sysenter + * - see debug trap handler in idt.s/idt64.s + */ + if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) + regs->eip -= 5; + else + regs->eip -= 2; + } + else if (error != EJUSTRETURN) { + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; /* carry bit */ + } else { /* (not error) */ + regs->eax = uthread->uu_rval[0]; + regs->edx = uthread->uu_rval[1]; + regs->efl &= ~EFL_CF; + } + } + + if (KTRPOINT(p, KTR_SYSRET)) + ktrsysret(p, code, error, uthread->uu_rval[0]); + + if (cancel_enable == _SYSCALL_CANCEL_NONE) + uthread->uu_flag &= ~UT_NOTCANCELPT; + + /* + * if we're holding the funnel + * than drop it regardless of whether + * we took it on system call entry + */ + exit_funnel_section(); + + if (uthread->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(uthread->uu_lowpri_delay); + uthread->uu_lowpri_delay = 0; + } + if (code != 180) + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + + thread_exception_return(); + /* NOTREACHED */ +} + + +void +unix_syscall64(x86_saved_state_t *state) +{ + thread_t thread; + unsigned short code; + struct sysent *callp; + void *uargp; + int args_in_regs; + int error; + int funnel_type; + struct proc *p; + struct uthread *uthread; + unsigned int cancel_enable; + x86_saved_state64_t *regs; + + assert(is_saved_state64(state)); + regs = saved_state64(state); + + if (regs->rax == 0x2000800) + thread_exception_return(); + + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + /* Get the approriate proc; may be different from task's for vfork() */ + if (!(uthread->uu_flag & UT_VFORK)) + p = (struct proc *)get_bsdtask_info(current_task()); + else + p = current_proc(); + + /* Verify that we are not being called from a task without a proc */ + if (p == NULL) { + regs->rax = EPERM; + regs->isf.rflags |= EFL_CF; + task_terminate_internal(current_task()); + thread_exception_return(); + /* NOTREACHED */ + } + args_in_regs = 6; + + code = regs->rax & SYSCALL_NUMBER_MASK; + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + uargp = (void *)(®s->rdi); + + if (callp == sysent) { + /* + * indirect system call... system call number + * passed as 'arg0' + */ + code = regs->rdi; + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + uargp = (void *)(®s->rsi); + args_in_regs = 5; + } + + if (callp->sy_narg != 0) { + if (code != 180) { + uint64_t *ip = (uint64_t *)uargp; + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); + } + assert(callp->sy_narg <= 8); + + if (callp->sy_narg > args_in_regs) { + int copyin_count; + + copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); + + error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); + if (error) { + regs->rax = error; + regs->isf.rflags |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } + } + /* + * XXX Turn 64 bit unsafe calls into nosys() + */ + if (callp->sy_funnel & UNSAFE_64BIT) { + callp = &sysent[63]; + goto unsafe; + } + + } else + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + 0, 0, 0, 0, 0); +unsafe: + + /* + * Delayed binding of thread credential to process credential, if we + * are not running with an explicitly set thread credential. + */ + if (uthread->uu_ucred != p->p_ucred && + (uthread->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = uthread->uu_ucred; + proc_lock(p); + uthread->uu_ucred = p->p_ucred; + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(p); + if (old != NOCRED) + kauth_cred_rele(old); + } + + uthread->uu_rval[0] = 0; + uthread->uu_rval[1] = 0; + + cancel_enable = callp->sy_cancel; + + if (cancel_enable == _SYSCALL_CANCEL_NONE) { + uthread->uu_flag |= UT_NOTCANCELPT; + } else { + if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + if (cancel_enable == _SYSCALL_CANCEL_PRE) { + /* system call cancelled; return to handle cancellation */ + regs->rax = EINTR; + regs->isf.rflags |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } else { + thread_abort_safely(thread); + } + } + } + + funnel_type = (callp->sy_funnel & FUNNEL_MASK); + if (funnel_type == KERNEL_FUNNEL) + thread_funnel_set(kernel_flock, TRUE); + + if (KTRPOINT(p, KTR_SYSCALL)) + ktrsyscall(p, code, callp->sy_narg, uargp); + + AUDIT_SYSCALL_ENTER(code, p, uthread); + error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); + AUDIT_SYSCALL_EXIT(error, p, uthread); + + if (error == ERESTART) { + /* + * all system calls come through via the syscall instruction + * in 64 bit mode... its 2 bytes in length + * move the user's pc back to repeat the syscall: + */ + regs->isf.rip -= 2; + } + else if (error != EJUSTRETURN) { + if (error) { + regs->rax = error; + regs->isf.rflags |= EFL_CF; /* carry bit */ + } else { /* (not error) */ + + switch (callp->sy_return_type) { + case _SYSCALL_RET_INT_T: + regs->rax = uthread->uu_rval[0]; + regs->rdx = uthread->uu_rval[1]; + break; + case _SYSCALL_RET_UINT_T: + regs->rax = ((u_int)uthread->uu_rval[0]); + regs->rdx = ((u_int)uthread->uu_rval[1]); + break; + case _SYSCALL_RET_OFF_T: + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); + regs->rdx = 0; + break; + case _SYSCALL_RET_NONE: + break; + default: + panic("unix_syscall: unknown return type"); + break; + } + regs->isf.rflags &= ~EFL_CF; + } + } + + if (KTRPOINT(p, KTR_SYSRET)) + ktrsysret(p, code, error, uthread->uu_rval[0]); + + if (cancel_enable == _SYSCALL_CANCEL_NONE) + uthread->uu_flag &= ~UT_NOTCANCELPT; + + /* + * if we're holding the funnel + * than drop it regardless of whether + * we took it on system call entry + */ + exit_funnel_section(); + + if (uthread->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(uthread->uu_lowpri_delay); + uthread->uu_lowpri_delay = 0; + } + if (code != 180) + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + + thread_exception_return(); + /* NOTREACHED */ +} + + +void +unix_syscall_return(int error) +{ + thread_t thread; + struct uthread *uthread; + struct proc *p; + unsigned short code; + vm_offset_t params; + struct sysent *callp; + unsigned int cancel_enable; + + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + p = current_proc(); + + if (proc_is64bit(p)) { + x86_saved_state64_t *regs; + + regs = saved_state64(find_user_regs(thread)); + + /* reconstruct code for tracing before blasting rax */ + code = regs->rax & SYSCALL_NUMBER_MASK; + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + + if (callp == sysent) + /* + * indirect system call... system call number + * passed as 'arg0' + */ + code = regs->rdi; + + if (error == ERESTART) { + /* + * all system calls come through via the syscall instruction + * in 64 bit mode... its 2 bytes in length + * move the user's pc back to repeat the syscall: + */ + regs->isf.rip -= 2; + } + else if (error != EJUSTRETURN) { + if (error) { + regs->rax = error; + regs->isf.rflags |= EFL_CF; /* carry bit */ + } else { /* (not error) */ + + switch (callp->sy_return_type) { + case _SYSCALL_RET_INT_T: + regs->rax = uthread->uu_rval[0]; + regs->rdx = uthread->uu_rval[1]; + break; + case _SYSCALL_RET_UINT_T: + regs->rax = ((u_int)uthread->uu_rval[0]); + regs->rdx = ((u_int)uthread->uu_rval[1]); + break; + case _SYSCALL_RET_OFF_T: + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); + regs->rdx = 0; + break; + case _SYSCALL_RET_NONE: + break; + default: + panic("unix_syscall: unknown return type"); + break; + } + regs->isf.rflags &= ~EFL_CF; + } + } + } else { + x86_saved_state32_t *regs; + + regs = saved_state32(find_user_regs(thread)); + + /* reconstruct code for tracing before blasting eax */ + code = regs->eax; + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + + if (callp == sysent) { + params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); + code = fuword(params); + } + if (error == ERESTART) { + regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2; + } + else if (error != EJUSTRETURN) { + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; /* carry bit */ + } else { /* (not error) */ + regs->eax = uthread->uu_rval[0]; + regs->edx = uthread->uu_rval[1]; + regs->efl &= ~EFL_CF; + } + } + } + if (KTRPOINT(p, KTR_SYSRET)) + ktrsysret(p, code, error, uthread->uu_rval[0]); + + cancel_enable = callp->sy_cancel; + + if (cancel_enable == _SYSCALL_CANCEL_NONE) + uthread->uu_flag &= ~UT_NOTCANCELPT; + + /* + * if we're holding the funnel + * than drop it regardless of whether + * we took it on system call entry + */ + exit_funnel_section(); + + if (uthread->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(uthread->uu_lowpri_delay); + uthread->uu_lowpri_delay = 0; + } + if (code != 180) + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + + thread_exception_return(); + /* NOTREACHED */ +} + +void +munge_wwwlww( + __unused const void *in32, + void *out64) +{ + uint32_t *arg32; + uint64_t *arg64; + + /* we convert in place in out64 */ + arg32 = (uint32_t *) out64; + arg64 = (uint64_t *) out64; + + arg64[5] = arg32[6]; /* wwwlwW */ + arg64[4] = arg32[5]; /* wwwlWw */ + arg32[7] = arg32[4]; /* wwwLww (hi) */ + arg32[6] = arg32[3]; /* wwwLww (lo) */ + arg64[2] = arg32[2]; /* wwWlww */ + arg64[1] = arg32[1]; /* wWwlww */ + arg64[0] = arg32[0]; /* Wwwlww */ +} diff --git a/bsd/dev/i386/unix_signal.c b/bsd/dev/i386/unix_signal.c index ca3b9f2df..8c6305d52 100644 --- a/bsd/dev/i386/unix_signal.c +++ b/bsd/dev/i386/unix_signal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -38,28 +38,35 @@ #include #include #include +#include +#include #include /* for thread_abort_safely */ +#include +#include +#include #include #include -#include - -extern struct i386_saved_state *get_user_regs(thread_t); - -extern boolean_t valid_user_segment_selectors(uint16_t cs, - uint16_t ss, - uint16_t ds, - uint16_t es, - uint16_t fs, - uint16_t gs); +#include /* Forward: */ extern boolean_t machine_exception(int, int, int, int *, int *); +extern kern_return_t thread_getstatus(register thread_t act, int flavor, + thread_state_t tstate, mach_msg_type_number_t *count); +extern kern_return_t thread_setstatus(thread_t thread, int flavor, + thread_state_t tstate, mach_msg_type_number_t count); /* Signal handler flavors supported */ /* These defns should match the Libc implmn */ #define UC_TRAD 1 +#define UC_FLAVOR 30 + +#define C_32_STK_ALIGN 16 +#define C_64_STK_ALIGN 16 +#define C_64_REDZONE_LEN 128 +#define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c)))) +#define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c)))) /* * Send an interrupt to process. @@ -71,101 +78,408 @@ extern boolean_t machine_exception(int, int, int, int *, int *); * pointer, and the argument pointer, it returns * to the user specified pc, psl. */ +struct sigframe32 { + int retaddr; + sig_t catcher; + int sigstyle; + int sig; + siginfo_t * sinfo; + struct ucontext * uctx; +}; + + + void -sendsig(p, catcher, sig, mask, code) - struct proc *p; - user_addr_t catcher; /* sig_t */ - int sig, mask; - u_long code; +sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_long code) { - struct sigframe { - int retaddr; - sig_t catcher; - int sigstyle; - int sig; - int code; - struct sigcontext * scp; - } frame, *fp; - struct sigcontext context, *scp; + union { + struct mcontext32 mctx32; + struct mcontext64 mctx64; + } mctx; + user_addr_t ua_sp; + user_addr_t ua_fp; + user_addr_t ua_cr2; + user_addr_t ua_sip; + user_addr_t ua_uctxp; + user_addr_t ua_mctxp; + user_siginfo_t sinfo64; + struct sigacts *ps = p->p_sigacts; - int oonstack; + int oonstack, flavor; + void * state; + mach_msg_type_number_t state_count; + int uthsigaltstack = 0; + int altstack = 0; + thread_t thread = current_thread(); struct uthread * ut; - struct i386_saved_state * saved_state = get_user_regs(thread); - sig_t trampact; + int stack_size = 0; + int infostyle = UC_TRAD; + if (p->p_sigacts->ps_siginfo & sigmask(sig)) + infostyle = UC_FLAVOR; + ut = get_bsdthread_info(thread); - oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK; - if ((ps->ps_flags & SAS_ALTSTACK) && !oonstack && - (ps->ps_sigonstack & sigmask(sig))) { - scp = ((struct sigcontext *)ps->ps_sigstk.ss_sp) - 1; - ps->ps_sigstk.ss_flags |= SA_ONSTACK; - } else - scp = ((struct sigcontext *)saved_state->uesp) - 1; - fp = ((struct sigframe *)scp) - 1; - - /* - * Build the argument list for the signal handler. - */ - trampact = (sig_t)ps->ps_trampact[sig]; - /* Handler should call sigreturn to get out of it */ - frame.retaddr = 0xffffffff; - frame.catcher = CAST_DOWN(sig_t,catcher); /* XXX LP64 */ - frame.sigstyle = UC_TRAD; - frame.sig = sig; - - if (sig == SIGILL || sig == SIGFPE) { - frame.code = code; - } else - frame.code = 0; - frame.scp = scp; - if (copyout((caddr_t)&frame, (user_addr_t)fp, sizeof (frame))) - goto bad; + uthsigaltstack = p->p_lflag & P_LTHSIGSTACK; + if (uthsigaltstack != 0 ) { + oonstack = ut->uu_sigstk.ss_flags & SA_ONSTACK; + altstack = ut->uu_flag & UT_ALTSTACK; + } else { + oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK; + altstack = ps->ps_flags & SAS_ALTSTACK; + } /* - * Build the signal context to be used by sigreturn. + * init siginfo */ - context.sc_onstack = oonstack; - context.sc_mask = mask; - context.sc_eax = saved_state->eax; - context.sc_ebx = saved_state->ebx; - context.sc_ecx = saved_state->ecx; - context.sc_edx = saved_state->edx; - context.sc_edi = saved_state->edi; - context.sc_esi = saved_state->esi; - context.sc_ebp = saved_state->ebp; - context.sc_esp = saved_state->uesp; - context.sc_ss = saved_state->ss; - context.sc_eflags = saved_state->efl; - context.sc_eip = saved_state->eip; - context.sc_cs = saved_state->cs; - if (saved_state->efl & EFL_VM) { - context.sc_ds = saved_state->v86_segs.v86_ds; - context.sc_es = saved_state->v86_segs.v86_es; - context.sc_fs = saved_state->v86_segs.v86_fs; - context.sc_gs = saved_state->v86_segs.v86_gs; - - saved_state->efl &= ~EFL_VM; + bzero((caddr_t)&sinfo64, sizeof(user_siginfo_t)); + sinfo64.si_signo = sig; + + if (proc_is64bit(p)) { + x86_thread_state64_t *tstate64; + struct user_ucontext64 uctx64; + + flavor = x86_THREAD_STATE64; + state_count = x86_THREAD_STATE64_COUNT; + state = (void *)&mctx.mctx64.ss; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + flavor = x86_FLOAT_STATE64; + state_count = x86_FLOAT_STATE64_COUNT; + state = (void *)&mctx.mctx64.fs; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + flavor = x86_EXCEPTION_STATE64; + state_count = x86_EXCEPTION_STATE64_COUNT; + state = (void *)&mctx.mctx64.es; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + tstate64 = &mctx.mctx64.ss; + + if (altstack && !oonstack && (ps->ps_sigonstack & sigmask(sig))) { + if (uthsigaltstack != 0) { + ua_sp = ut->uu_sigstk.ss_sp; + stack_size = ut->uu_sigstk.ss_size; + ua_sp += stack_size; + ut->uu_sigstk.ss_flags |= SA_ONSTACK; + } else { + ua_sp = ps->ps_sigstk.ss_sp; + stack_size = ps->ps_sigstk.ss_size; + ua_sp += stack_size; + ps->ps_sigstk.ss_flags |= SA_ONSTACK; + } + } else + ua_sp = tstate64->rsp; + ua_cr2 = mctx.mctx64.es.faultvaddr; + + /* The x86_64 ABI defines a 128-byte red zone. */ + ua_sp -= C_64_REDZONE_LEN; + + ua_sp -= sizeof (struct user_ucontext64); + ua_uctxp = ua_sp; // someone tramples the first word! + + ua_sp -= sizeof (user_siginfo_t); + ua_sip = ua_sp; + + ua_sp -= sizeof (struct mcontext64); + ua_mctxp = ua_sp; + + /* + * Align the frame and stack pointers to 16 bytes for SSE. + * (Note that we use 'ua_fp' as the base of the stack going forward) + */ + ua_fp = TRUNC_DOWN64(ua_sp, C_64_STK_ALIGN); + + /* + * But we need to account for the return address so the alignment is + * truly "correct" at _sigtramp + */ + ua_fp -= sizeof(user_addr_t); + + /* + * Build the signal context to be used by sigreturn. + */ + bzero(&uctx64, sizeof(uctx64)); + + uctx64.uc_onstack = oonstack; + uctx64.uc_sigmask = mask; + uctx64.uc_stack.ss_sp = ua_fp; + uctx64.uc_stack.ss_size = stack_size; + + if (oonstack) + uctx64.uc_stack.ss_flags |= SS_ONSTACK; + uctx64.uc_link = 0; + + uctx64.uc_mcsize = sizeof(struct mcontext64); + uctx64.uc_mcontext64 = ua_mctxp; + + if (copyout((caddr_t)&uctx64, ua_uctxp, sizeof (uctx64))) + goto bad; + + if (copyout((caddr_t)&mctx.mctx64, ua_mctxp, sizeof (struct mcontext64))) + goto bad; + + sinfo64.pad[0] = tstate64->rsp; + sinfo64.si_addr = tstate64->rip; + + tstate64->rip = ps->ps_trampact[sig]; + tstate64->rsp = ua_fp; + tstate64->rflags = get_eflags_exportmask(); + /* + * JOE - might not need to set these + */ + tstate64->cs = USER64_CS; + tstate64->fs = NULL_SEG; + tstate64->gs = USER_CTHREAD; + + /* + * Build the argument list for the signal handler. + * Handler should call sigreturn to get out of it + */ + tstate64->rdi = ua_catcher; + tstate64->rsi = infostyle; + tstate64->rdx = sig; + tstate64->rcx = ua_sip; + tstate64->r8 = ua_uctxp; + } else { - context.sc_ds = saved_state->ds; - context.sc_es = saved_state->es; - context.sc_fs = saved_state->fs; - context.sc_gs = saved_state->gs; + x86_thread_state32_t *tstate32; + struct ucontext uctx32; + struct sigframe32 frame32; + + flavor = x86_THREAD_STATE32; + state_count = x86_THREAD_STATE32_COUNT; + state = (void *)&mctx.mctx32.ss; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + flavor = x86_FLOAT_STATE32; + state_count = x86_FLOAT_STATE32_COUNT; + state = (void *)&mctx.mctx32.fs; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + flavor = x86_EXCEPTION_STATE32; + state_count = x86_EXCEPTION_STATE32_COUNT; + state = (void *)&mctx.mctx32.es; + if (thread_getstatus(thread, flavor, (thread_state_t)state, &state_count) != KERN_SUCCESS) + goto bad; + + tstate32 = &mctx.mctx32.ss; + + if (altstack && !oonstack && (ps->ps_sigonstack & sigmask(sig))) { + if (uthsigaltstack != 0) { + ua_sp = ut->uu_sigstk.ss_sp; + stack_size = ut->uu_sigstk.ss_size; + ua_sp += stack_size; + ut->uu_sigstk.ss_flags |= SA_ONSTACK; + } else { + ua_sp = ps->ps_sigstk.ss_sp; + stack_size = ps->ps_sigstk.ss_size; + ua_sp += stack_size; + ps->ps_sigstk.ss_flags |= SA_ONSTACK; + } + } else + ua_sp = tstate32->esp; + ua_cr2 = mctx.mctx32.es.faultvaddr; + + ua_sp -= sizeof (struct ucontext); + ua_uctxp = ua_sp; // someone tramples the first word! + + ua_sp -= sizeof (siginfo_t); + ua_sip = ua_sp; + + ua_sp -= sizeof (struct mcontext32); + ua_mctxp = ua_sp; + + ua_sp -= sizeof (struct sigframe32); + ua_fp = ua_sp; + + /* + * Align the frame and stack pointers to 16 bytes for SSE. + * (Note that we use 'fp' as the base of the stack going forward) + */ + ua_fp = TRUNC_DOWN32(ua_fp, C_32_STK_ALIGN); + + /* + * But we need to account for the return address so the alignment is + * truly "correct" at _sigtramp + */ + ua_fp -= sizeof(frame32.retaddr); + + /* + * Build the argument list for the signal handler. + * Handler should call sigreturn to get out of it + */ + frame32.retaddr = -1; + frame32.sigstyle = infostyle; + frame32.sig = sig; + frame32.catcher = CAST_DOWN(sig_t, ua_catcher); + frame32.sinfo = CAST_DOWN(siginfo_t *, ua_sip); + frame32.uctx = CAST_DOWN(struct ucontext *, ua_uctxp); + + if (copyout((caddr_t)&frame32, ua_fp, sizeof (frame32))) + goto bad; + + /* + * Build the signal context to be used by sigreturn. + */ + bzero(&uctx32, sizeof(uctx32)); + + uctx32.uc_onstack = oonstack; + uctx32.uc_sigmask = mask; + uctx32.uc_stack.ss_sp = CAST_DOWN(char *, ua_fp); + uctx32.uc_stack.ss_size = stack_size; + + if (oonstack) + uctx32.uc_stack.ss_flags |= SS_ONSTACK; + uctx32.uc_link = 0; + + uctx32.uc_mcsize = sizeof(struct mcontext32); + + uctx32.uc_mcontext = CAST_DOWN(struct mcontext *, ua_mctxp); + + if (copyout((caddr_t)&uctx32, ua_uctxp, sizeof (uctx32))) + goto bad; + + if (copyout((caddr_t)&mctx.mctx32, ua_mctxp, sizeof (struct mcontext32))) + goto bad; + + sinfo64.pad[0] = tstate32->esp; + sinfo64.si_addr = tstate32->eip; + } + + switch (sig) { + case SIGCHLD: + sinfo64.si_pid = p->si_pid; + p->si_pid =0; + sinfo64.si_status = p->si_status; + p->si_status = 0; + sinfo64.si_uid = p->si_uid; + p->si_uid =0; + sinfo64.si_code = p->si_code; + p->si_code = 0; + if (sinfo64.si_code == CLD_EXITED) { + if (WIFEXITED(sinfo64.si_status)) + sinfo64.si_code = CLD_EXITED; + else if (WIFSIGNALED(sinfo64.si_status)) { + if (WCOREDUMP(sinfo64.si_status)) + sinfo64.si_code = CLD_DUMPED; + else + sinfo64.si_code = CLD_KILLED; + } + } + break; + case SIGILL: + switch (ut->uu_code) { + case EXC_I386_INVOP: + sinfo64.si_code = ILL_ILLOPC; + break; + case EXC_I386_GPFLT: + sinfo64.si_code = ILL_PRVOPC; + break; + default: + printf("unknown SIGILL code %d\n", ut->uu_code); + sinfo64.si_code = ILL_NOOP; + } + break; + case SIGFPE: +#define FP_IE 0 /* Invalid operation */ +#define FP_DE 1 /* Denormalized operand */ +#define FP_ZE 2 /* Zero divide */ +#define FP_OE 3 /* overflow */ +#define FP_UE 4 /* underflow */ +#define FP_PE 5 /* precision */ + if (ut->uu_subcode & (1 << FP_ZE)) { + sinfo64.si_code = FPE_FLTDIV; + } else if (ut->uu_subcode & (1 << FP_OE)) { + sinfo64.si_code = FPE_FLTOVF; + } else if (ut->uu_subcode & (1 << FP_UE)) { + sinfo64.si_code = FPE_FLTUND; + } else if (ut->uu_subcode & (1 << FP_PE)) { + sinfo64.si_code = FPE_FLTRES; + } else if (ut->uu_subcode & (1 << FP_IE)) { + sinfo64.si_code = FPE_FLTINV; + } else { + printf("unknown SIGFPE code %d, subcode %x\n", + ut->uu_code, ut->uu_subcode); + sinfo64.si_code = FPE_NOOP; + } + break; + case SIGBUS: + sinfo64.si_code = BUS_ADRERR; + sinfo64.si_addr = ua_cr2; + break; + case SIGTRAP: + sinfo64.si_code = TRAP_BRKPT; + break; + case SIGSEGV: + sinfo64.si_addr = ua_cr2; + + switch (ut->uu_code) { + case KERN_PROTECTION_FAILURE: + sinfo64.si_code = SEGV_ACCERR; + break; + case KERN_INVALID_ADDRESS: + sinfo64.si_code = SEGV_MAPERR; + break; + default: + printf("unknown SIGSEGV code %d\n", ut->uu_code); + sinfo64.si_code = FPE_NOOP; + } + break; + default: + break; } - if (copyout((caddr_t)&context, (user_addr_t)scp, sizeof (context))) - goto bad; + if (proc_is64bit(p)) { + if (copyout((caddr_t)&sinfo64, ua_sip, sizeof (sinfo64))) + goto bad; + + flavor = x86_THREAD_STATE64; + state_count = x86_THREAD_STATE64_COUNT; + state = (void *)&mctx.mctx64.ss; + } else { + x86_thread_state32_t *tstate32; + siginfo_t sinfo32; - saved_state->eip = (unsigned int)trampact; - saved_state->cs = USER_CS; + bzero((caddr_t)&sinfo32, sizeof(siginfo_t)); - saved_state->uesp = (unsigned int)fp; - saved_state->ss = USER_DS; + sinfo32.si_signo = sinfo64.si_signo; + sinfo32.si_code = sinfo64.si_code; + sinfo32.si_pid = sinfo64.si_pid; + sinfo32.si_uid = sinfo64.si_uid; + sinfo32.si_status = sinfo64.si_status; + sinfo32.si_addr = CAST_DOWN(void *, sinfo64.si_addr); + sinfo32.pad[0] = sinfo64.pad[0]; + + if (copyout((caddr_t)&sinfo32, ua_sip, sizeof (sinfo32))) + goto bad; + + tstate32 = &mctx.mctx32.ss; + tstate32->eip = CAST_DOWN(unsigned int, ps->ps_trampact[sig]); + tstate32->esp = CAST_DOWN(unsigned int, ua_fp); + + tstate32->eflags = get_eflags_exportmask(); + + tstate32->cs = USER_CS; + tstate32->ss = USER_DS; + tstate32->ds = USER_DS; + tstate32->es = USER_DS; + tstate32->fs = NULL_SEG; + tstate32->gs = USER_CTHREAD; + + flavor = x86_THREAD_STATE32; + state_count = x86_THREAD_STATE32_COUNT; + state = (void *)tstate32; + } + if (thread_setstatus(thread, flavor, (thread_state_t)state, state_count) != KERN_SUCCESS) + goto bad; + ml_fp_setvalid(FALSE); - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = NULL_SEG; - saved_state->gs = USER_CTHREAD; return; bad: @@ -189,92 +503,102 @@ sendsig(p, catcher, sig, mask, code) * psl to gain improper priviledges or to cause * a machine fault. */ -/* ARGSUSED */ + int sigreturn( struct proc *p, struct sigreturn_args *uap, __unused int *retval) { - struct sigcontext context; - thread_t thread = current_thread(); - int error; - struct i386_saved_state* saved_state = (struct i386_saved_state*) - get_user_regs(thread); + union { + struct mcontext32 mctx32; + struct mcontext64 mctx64; + } mctx; + thread_t thread = current_thread(); struct uthread * ut; + int error; + int uthsigaltstack = 0; + int onstack = 0; + mach_msg_type_number_t ts_count; + unsigned int ts_flavor; + void * ts; + mach_msg_type_number_t fs_count; + unsigned int fs_flavor; + void * fs; - - if (saved_state == NULL) - return EINVAL; + ut = (struct uthread *)get_bsdthread_info(thread); + uthsigaltstack = p->p_lflag & P_LTHSIGSTACK; - if ((error = copyin(CAST_USER_ADDR_T(uap->sigcntxp), (void *)&context, - sizeof (context)))) - return(error); + if (proc_is64bit(p)) { + struct user_ucontext64 uctx64; - /* - * Validate segment selectors. - * Bad values would result in kernel exception at context switch - * back to user mode. If other state is invalid an exception will - * occur in user context. - */ - if (!valid_user_segment_selectors(context.sc_cs, - context.sc_ss, - context.sc_ds, - context.sc_es, - context.sc_fs, - context.sc_gs)) { - return EINVAL; - } + if ((error = copyin(uap->uctx, (void *)&uctx64, sizeof (uctx64)))) + return(error); - ut = (struct uthread *)get_bsdthread_info(thread); + if ((error = copyin(uctx64.uc_mcontext64, (void *)&mctx.mctx64, sizeof (struct mcontext64)))) + return(error); - if (context.sc_onstack & 01) - p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; - else - p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; + onstack = uctx64.uc_onstack & 01; + ut->uu_sigmask = uctx64.uc_sigmask & ~sigcantmask; - ut->uu_sigmask = context.sc_mask &~ sigcantmask; - if(ut->uu_siglist & ~ut->uu_sigmask) - signal_setast(thread); + ts_flavor = x86_THREAD_STATE64; + ts_count = x86_THREAD_STATE64_COUNT; + ts = (void *)&mctx.mctx64.ss; + + fs_flavor = x86_FLOAT_STATE64; + fs_count = x86_FLOAT_STATE64_COUNT; + fs = (void *)&mctx.mctx64.fs; + + } else { + struct ucontext uctx32; + + if ((error = copyin(uap->uctx, (void *)&uctx32, sizeof (uctx32)))) + return(error); + + if ((error = copyin(CAST_USER_ADDR_T(uctx32.uc_mcontext), (void *)&mctx.mctx32, sizeof (struct mcontext32)))) + return(error); - saved_state->eax = context.sc_eax; - saved_state->ebx = context.sc_ebx; - saved_state->ecx = context.sc_ecx; - saved_state->edx = context.sc_edx; - saved_state->edi = context.sc_edi; - saved_state->esi = context.sc_esi; - saved_state->ebp = context.sc_ebp; - saved_state->uesp = context.sc_esp; - saved_state->ss = context.sc_ss; - saved_state->efl = context.sc_eflags; - saved_state->efl &= ~EFL_USERCLR; - saved_state->efl |= EFL_USERSET; - saved_state->eip = context.sc_eip; - saved_state->cs = context.sc_cs; - - if (context.sc_eflags & EFL_VM) { - saved_state->ds = NULL_SEG; - saved_state->es = NULL_SEG; - saved_state->fs = NULL_SEG; - saved_state->gs = NULL_SEG; - saved_state->v86_segs.v86_ds = context.sc_ds; - saved_state->v86_segs.v86_es = context.sc_es; - saved_state->v86_segs.v86_fs = context.sc_fs; - saved_state->v86_segs.v86_gs = context.sc_gs; - - saved_state->efl |= EFL_VM; + onstack = uctx32.uc_onstack & 01; + ut->uu_sigmask = uctx32.uc_sigmask & ~sigcantmask; + + ts_flavor = x86_THREAD_STATE32; + ts_count = x86_THREAD_STATE32_COUNT; + ts = (void *)&mctx.mctx32.ss; + + fs_flavor = x86_FLOAT_STATE32; + fs_count = x86_FLOAT_STATE32_COUNT; + fs = (void *)&mctx.mctx32.fs; } - else { - saved_state->ds = context.sc_ds; - saved_state->es = context.sc_es; - saved_state->fs = context.sc_fs; - saved_state->gs = context.sc_gs; + if (onstack) { + if (uthsigaltstack != 0) + ut->uu_sigstk.ss_flags |= SA_ONSTACK; + else + p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; + } else { + if (uthsigaltstack != 0) + ut->uu_sigstk.ss_flags &= ~SA_ONSTACK; + else + p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; } + if (ut->uu_siglist & ~ut->uu_sigmask) + signal_setast(thread); + + /* + * thread_set_state() does all the needed checks for the passed in content + */ + if (thread_setstatus(thread, ts_flavor, ts, ts_count) != KERN_SUCCESS) + return(EINVAL); + + ml_fp_setvalid(TRUE); + + if (thread_setstatus(thread, fs_flavor, fs, fs_count) != KERN_SUCCESS) + return(EINVAL); return (EJUSTRETURN); } + /* * machine_exception() performs MD translation * of a mach exception to a unix signal and code. diff --git a/bsd/dev/ppc/kern_machdep.c b/bsd/dev/ppc/kern_machdep.c index 622e80e7b..bdf477f7f 100644 --- a/bsd/dev/ppc/kern_machdep.c +++ b/bsd/dev/ppc/kern_machdep.c @@ -35,8 +35,7 @@ #include #include #include - -int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); +#include /* * Routine: grade_binary() @@ -221,8 +220,6 @@ grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype) /* NOTREACHED */ } -extern vm_map_offset_t kvtophys64(vm_map_offset_t); - boolean_t kernacc( off_t start, @@ -236,7 +233,7 @@ kernacc( end = start + len; while (base < end) { - if(kvtophys64((vm_map_offset_t)base) == (vm_map_offset_t)0) + if(kvtophys((vm_offset_t)base) == NULL) return(FALSE); base += page_size; } diff --git a/bsd/dev/ppc/mem.c b/bsd/dev/ppc/mem.c index 4e7c8f8c3..54fe48e68 100644 --- a/bsd/dev/ppc/mem.c +++ b/bsd/dev/ppc/mem.c @@ -87,6 +87,7 @@ static caddr_t devzerobuf; extern pmap_t kernel_pmap; extern boolean_t kernacc(off_t, size_t ); +extern int setup_kmem; int mmread(dev_t dev, struct uio *uio); int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw); @@ -112,9 +113,14 @@ mmwrite(dev, uio) } int -mmioctl(__unused dev_t dev, u_long cmd, __unused caddr_t data, +mmioctl(dev_t dev, u_long cmd, __unused caddr_t data, __unused int flag, __unused struct proc *p) { + int minnum = minor(dev); + + if ((setup_kmem == 0) && ((minnum == 0) || (minnum == 1))) + return(EINVAL); + switch (cmd) { case FIONBIO: case FIOASYNC: @@ -143,6 +149,7 @@ mmrw(dev, uio, rw) int error = 0; vm_offset_t where; + while (uio_resid(uio) > 0 && error == 0) { if (uio_iov_len(uio) == 0) { uio_next_iov(uio); @@ -155,6 +162,8 @@ mmrw(dev, uio, rw) /* minor device 0 is physical memory */ case 0: + if (setup_kmem == 0) + return(ENODEV); vll = trunc_page_64(uio->uio_offset); if(((vll >> 31) == 1) || vll >= ((dgWork.dgFlags & enaDiagDM) ? mem_actual : max_mem)) goto fault; @@ -190,6 +199,8 @@ mmrw(dev, uio, rw) /* minor device 1 is kernel memory */ case 1: + if (setup_kmem == 0) + return(ENODEV); /* Do some sanity checking */ if (((addr64_t)uio->uio_offset > vm_last_addr) || ((addr64_t)uio->uio_offset < VM_MIN_KERNEL_ADDRESS)) diff --git a/bsd/dev/ppc/munge.s b/bsd/dev/ppc/munge.s index 0f4e09acf..fdf7c7060 100644 --- a/bsd/dev/ppc/munge.s +++ b/bsd/dev/ppc/munge.s @@ -240,6 +240,33 @@ _munge_wwwl: blr + .align 5 + .globl _munge_wwwlww +_munge_wwwlww: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + lwz r10,5*8+4(r3) + lwz r11,6*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r8,3*8+0(r4) + stw r9,3*8+4(r4) + stw r0,4*8+0(r4) + stw r10,4*8+4(r4) + stw r0,5*8+0(r4) + stw r11,5*8+4(r4) + + blr + .align 5 .globl _munge_wwwwl // 4 'w's and an l _munge_wwwwl: diff --git a/bsd/dev/ppc/systemcalls.c b/bsd/dev/ppc/systemcalls.c index 4ec9ebdf5..6c38ac6a1 100644 --- a/bsd/dev/ppc/systemcalls.c +++ b/bsd/dev/ppc/systemcalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -124,8 +124,6 @@ unix_syscall(struct savearea *regs) kauth_cred_rele(old); } - uthread->uu_ar0 = (int *)regs; - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp->sy_narg != 0) { @@ -323,7 +321,6 @@ unix_syscall_return(int error) struct savearea *regs; unsigned short code; struct sysent *callp; - int funnel_type; unsigned int cancel_enable; thread_act = current_thread(); @@ -441,90 +438,9 @@ unix_syscall_return(int error) /* NOTREACHED */ } -/* - * Time of day and interval timer support. - * - * These routines provide the kernel entry points to get and set - * the time-of-day and per-process interval timers. Subroutines - * here provide support for adding and subtracting timeval structures - * and decrementing interval timers, optionally reloading the interval - * timers when they expire. - */ -/* NOTE THIS implementation is for ppc architectures only. - * It is infrequently called, since the commpage intercepts - * most calls in user mode. - * - * XXX Y2038 bug because of assumed return of 32 bit seconds value, and - * XXX first parameter to clock_gettimeofday() - */ -int -ppc_gettimeofday(__unused struct proc *p, - register struct ppc_gettimeofday_args *uap, - register_t *retval) -{ - int error = 0; - extern lck_spin_t * tz_slock; - - if (uap->tp) - clock_gettimeofday(&retval[0], &retval[1]); - - if (uap->tzp) { - struct timezone ltz; - - lck_spin_lock(tz_slock); - ltz = tz; - lck_spin_unlock(tz_slock); - error = copyout((caddr_t)<z, uap->tzp, sizeof (tz)); - } - - return (error); -} - #ifdef JOE_DEBUG joe_debug(char *p) { printf("%s\n", p); } #endif - - -/* - * WARNING - this is a temporary workaround for binary compatibility issues - * with anti-piracy software that relies on patching ptrace (3928003). - * This KPI will be removed in the system release after Tiger. - */ -uintptr_t temp_patch_ptrace(uintptr_t new_ptrace) -{ - struct sysent * callp; - sy_call_t * old_ptrace; - - if (new_ptrace == 0) - return(0); - - enter_funnel_section(kernel_flock); - callp = &sysent[26]; - old_ptrace = callp->sy_call; - - /* only allow one patcher of ptrace */ - if (old_ptrace == (sy_call_t *) ptrace) { - callp->sy_call = (sy_call_t *) new_ptrace; - } - else { - old_ptrace = NULL; - } - exit_funnel_section( ); - - return((uintptr_t)old_ptrace); -} - -void temp_unpatch_ptrace(void) -{ - struct sysent * callp; - - enter_funnel_section(kernel_flock); - callp = &sysent[26]; - callp->sy_call = (sy_call_t *) ptrace; - exit_funnel_section( ); - - return; -} diff --git a/bsd/dev/ppc/unix_signal.c b/bsd/dev/ppc/unix_signal.c index 75a700d51..83bab1f9d 100644 --- a/bsd/dev/ppc/unix_signal.c +++ b/bsd/dev/ppc/unix_signal.c @@ -196,8 +196,11 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long int stack_size = 0; void * tstate; int flavor; - int ctx32 = 1; - + int ctx32 = 1; + int uthsigaltstack = 0; + int altstack = 0; + + th_act = current_thread(); ut = get_bsdthread_info(th_act); @@ -300,15 +303,31 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long } trampact = ps->ps_trampact[sig]; - oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK; + uthsigaltstack = p->p_lflag & P_LTHSIGSTACK; + + if (uthsigaltstack != 0 ) { + oonstack = ut->uu_sigstk.ss_flags & SA_ONSTACK; + altstack = ut->uu_flag & UT_ALTSTACK; + } else { + oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK; + altstack = ps->ps_flags & SAS_ALTSTACK; + } + /* figure out where our new stack lives */ - if ((ps->ps_flags & SAS_ALTSTACK) && !oonstack && + if (altstack && !oonstack && (ps->ps_sigonstack & sigmask(sig))) { - sp = ps->ps_sigstk.ss_sp; - sp += ps->ps_sigstk.ss_size; - stack_size = ps->ps_sigstk.ss_size; - ps->ps_sigstk.ss_flags |= SA_ONSTACK; + if (uthsigaltstack != 0) { + sp = ut->uu_sigstk.ss_sp; + sp += ut->uu_sigstk.ss_size; + stack_size = ut->uu_sigstk.ss_size; + ut->uu_sigstk.ss_flags |= SA_ONSTACK; + } else { + sp = ps->ps_sigstk.ss_sp; + sp += ps->ps_sigstk.ss_size; + stack_size = ps->ps_sigstk.ss_size; + ps->ps_sigstk.ss_flags |= SA_ONSTACK; + } } else { if (ctx32 == 0) @@ -632,8 +651,9 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) struct uthread * ut; int vec_used = 0; void *tsptr, *fptr, *vptr; - int infostyle = uap->infostyle; - + int infostyle = uap->infostyle; + int uthsigaltstack = 0; + th_act = current_thread(); ut = (struct uthread *)get_bsdthread_info(th_act); @@ -675,11 +695,21 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) error = copyin(uctx.uc_mcontext64, mactx, uctx.uc_mcsize); if (error) return(error); - - if ((uctx.uc_onstack & 01)) + + uthsigaltstack = p->p_lflag & P_LTHSIGSTACK; + + + if (uctx.uc_onstack & 01) { + if (uthsigaltstack != 0) + ut->uu_sigstk.ss_flags |= SA_ONSTACK; + else p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; - else - p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; + } else { + if (uthsigaltstack != 0) + ut->uu_sigstk.ss_flags &= ~SA_ONSTACK; + else + p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; + } ut->uu_sigmask = uctx.uc_sigmask & ~sigcantmask; if (ut->uu_siglist & ~ut->uu_sigmask) diff --git a/bsd/dev/random/randomdev.c b/bsd/dev/random/randomdev.c index 747656750..c321228ac 100644 --- a/bsd/dev/random/randomdev.c +++ b/bsd/dev/random/randomdev.c @@ -238,8 +238,8 @@ random_write (__unused dev_t dev, struct uio *uio, __unused int ioflag) /* put it in Yarrow */ if (prngInput(gPrngRef, (BYTE*) rdBuffer, - sizeof (rdBuffer), SYSTEM_SOURCE, - sizeof (rdBuffer) * 8) != 0) { + bytesToInput, SYSTEM_SOURCE, + bytesToInput * 8) != 0) { retCode = EIO; goto error_exit; } @@ -279,7 +279,7 @@ random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) int bytesToRead = min(uio_resid(uio), sizeof (wrBuffer)); /* get the data from Yarrow */ - if (prngOutput(gPrngRef, (BYTE *) wrBuffer, sizeof (wrBuffer)) != 0) { + if (prngOutput(gPrngRef, (BYTE *) wrBuffer, bytesToRead) != 0) { printf ("Couldn't read data from Yarrow.\n"); /* something's really weird */ diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index 33070c045..018ea1583 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include extern vm_map_t mb_map; @@ -47,20 +49,28 @@ extern u_long tcp_recvspace; void bsd_bufferinit(void); extern void md_prepare_for_shutdown(int, int, char *); +int bsd_mbuf_cluster_reserve(void); + /* * Declare these as initialized data so we can patch them. */ #ifdef NBUF -int nbuf = NBUF; +int max_nbuf_headers = NBUF; int niobuf = NBUF / 2; - +int nbuf_hashelements = NBUF; +int nbuf = NBUF; #else -int nbuf = 0; +int max_nbuf_headers = 0; int niobuf = 0; - +int nbuf_hashelements = 0; +int nbuf = 0; #endif +SYSCTL_INT (_kern, OID_AUTO, nbuf, CTLFLAG_RD, &nbuf, 0, ""); +SYSCTL_INT (_kern, OID_AUTO, maxnbuf, CTLFLAG_RW, &max_nbuf_headers, 0, ""); + +__private_extern__ int customnbuf = 0; int srv = 0; /* Flag indicates a server boot when set */ int ncl = 0; @@ -77,21 +87,30 @@ bsd_startupearly(void) vm_size_t size; kern_return_t ret; - if (nbuf == 0) - nbuf = atop(sane_size / 100); /* Get 1% of ram, but no more than we can map */ - if (nbuf > 8192) - nbuf = 8192; - if (nbuf < 256) - nbuf = 256; + /* clip the number of buf headers upto 16k */ + if (max_nbuf_headers == 0) + max_nbuf_headers = atop(sane_size / 50); /* Get 2% of ram, but no more than we can map */ + if ((customnbuf == 0) && (max_nbuf_headers > 16384)) + max_nbuf_headers = 16384; + if (max_nbuf_headers < 256) + max_nbuf_headers = 256; + + /* clip the number of hash elements to 200000 */ + if ( (customnbuf == 0 ) && nbuf_hashelements == 0) { + nbuf_hashelements = atop(sane_size / 50); + if (nbuf_hashelements > 200000) + nbuf_hashelements = 200000; + } else + nbuf_hashelements = max_nbuf_headers; if (niobuf == 0) - niobuf = nbuf; + niobuf = max_nbuf_headers; if (niobuf > 4096) niobuf = 4096; if (niobuf < 128) niobuf = 128; - size = (nbuf + niobuf) * sizeof(struct buf); + size = (max_nbuf_headers + niobuf) * sizeof(struct buf); size = round_page(size); ret = kmem_suballoc(kernel_map, @@ -116,13 +135,11 @@ bsd_startupearly(void) buf = (struct buf *) firstaddr; bzero(buf, size); - if (sane_size > (64 * 1024 * 1024) || ncl) { + { int scale; - if ((nmbclusters = ncl) == 0) { - if ((nmbclusters = ((sane_size / 16)/MCLBYTES)) > 32768) - nmbclusters = 32768; - } + nmbclusters = bsd_mbuf_cluster_reserve() / MCLBYTES; + if ((scale = nmbclusters / NMBCLUSTERS) > 1) { tcp_sendspace *= scale; tcp_recvspace *= scale; @@ -133,6 +150,17 @@ bsd_startupearly(void) tcp_recvspace = 32 * 1024; } } + + /* + * Size vnodes based on memory + * Number vnodes is (memsize/64k) + 1024 + * This is the calculation that is used by launchd in tiger + * we are clipping the max based on 16G + * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168; + */ + desiredvnodes = (sane_size/65536) + 1024; + if (desiredvnodes > 263168) + desiredvnodes = 263168; } void @@ -159,3 +187,24 @@ bsd_bufferinit(void) */ bufinit(); } + +/* + * this has been broken out into a separate routine that + * can be called from the x86 early vm initialization to + * determine how much lo memory to reserve on systems with + * DMA hardware that can't fully address all of the physical + * memory that is present. + */ +int +bsd_mbuf_cluster_reserve(void) +{ + if (sane_size > (64 * 1024 * 1024) || ncl) { + + if ((nmbclusters = ncl) == 0) { + if ((nmbclusters = ((sane_size / 16)/MCLBYTES)) > 32768) + nmbclusters = 32768; + } + } + + return (nmbclusters * MCLBYTES); +} diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index 4e3e3370b..2d34f3f8a 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -248,6 +248,10 @@ typedef struct hfsmount { lck_mtx_t hfs_mutex; /* protects access to hfsmount data */ void *hfs_freezing_proc; /* who froze the fs */ lck_rw_t hfs_insync; /* protects sync/freeze interaction */ + + /* Resize variables: */ + u_int32_t hfs_resize_filesmoved; + u_int32_t hfs_resize_totalfiles; } hfsmount_t; typedef hfsmount_t ExtendedVCB; @@ -275,6 +279,7 @@ typedef hfsmount_t ExtendedVCB; #define HFS_FRAGMENTED_FREESPACE 0x100 #define HFS_NEED_JNL_RESET 0x200 #define HFS_HAS_SPARSE_DEVICE 0x400 +#define HFS_RESIZE_IN_PROGRESS 0x800 #define HFS_MOUNT_LOCK(hfsmp, metadata) \ @@ -491,6 +496,7 @@ extern void hfs_checkextendedsecurity(struct hfsmount *hfsmp); extern int hfs_extendfs(struct hfsmount *, u_int64_t, vfs_context_t); extern int hfs_truncatefs(struct hfsmount *, u_int64_t, vfs_context_t); +extern int hfs_resize_progress(struct hfsmount *, u_int32_t *); extern int hfs_isallocated(struct hfsmount *, u_long, u_long); diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 503528553..3c19d78e2 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -289,6 +289,8 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } +#define HFS_CLUMP_ADJ_LIMIT (200*1024*1024) + __private_extern__ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) { @@ -321,7 +323,13 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) } vcb = VTOVCB(vp); - + + /* Take past growth into account when extending the catalog file. */ + if ((VTOC(vp)->c_fileid == kHFSCatalogFileID) && + (bytesToAdd / vcb->blockSize) < filePtr->fcbExtents[0].blockCount) { + bytesToAdd = filePtr->fcbExtents[0].blockCount * (UInt64)vcb->blockSize; + bytesToAdd = MIN(bytesToAdd, HFS_CLUMP_ADJ_LIMIT); + } /* * The Extents B-tree can't have overflow extents. ExtendFileC will * return an error if an attempt is made to extend the Extents B-tree diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index bb4e69620..0a3f112ce 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -653,6 +653,9 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantr bcopy(&recp->hfsPlusFile.resourceFork.extents[0], &forkp->cf_extents[0], sizeof(HFSPlusExtentRecord)); } else { + int i; + u_int32_t validblks; + /* Convert the data fork. */ forkp->cf_size = recp->hfsPlusFile.dataFork.logicalSize; forkp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks; @@ -667,6 +670,36 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantr forkp->cf_vblocks = 0; bcopy(&recp->hfsPlusFile.dataFork.extents[0], &forkp->cf_extents[0], sizeof(HFSPlusExtentRecord)); + + /* Validate the fork's resident extents. */ + validblks = 0; + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (forkp->cf_extents[i].startBlock + forkp->cf_extents[i].blockCount >= hfsmp->totalBlocks) { + /* Suppress any bad extents so a remove can succeed. */ + forkp->cf_extents[i].startBlock = 0; + forkp->cf_extents[i].blockCount = 0; + /* Disable writes */ + if (attrp != NULL) { + attrp->ca_mode &= S_IFMT | S_IRUSR | S_IRGRP | S_IROTH; + } + } else { + validblks += forkp->cf_extents[i].blockCount; + } + } + /* Adjust for any missing blocks. */ + if ((validblks < forkp->cf_blocks) && (forkp->cf_extents[7].blockCount == 0)) { + u_int64_t psize; + + forkp->cf_blocks = validblks; + if (attrp != NULL) { + attrp->ca_blocks = validblks + recp->hfsPlusFile.resourceFork.totalBlocks; + } + psize = (u_int64_t)validblks * (u_int64_t)hfsmp->blockSize; + if (psize < forkp->cf_size) { + forkp->cf_size = psize; + } + + } } } if (descp != NULL) { diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c index a317afe81..c93b1f1a4 100644 --- a/bsd/hfs/hfs_chash.c +++ b/bsd/hfs/hfs_chash.c @@ -96,11 +96,8 @@ hfs_chashinit() cnodehashtbl = hashinit(desiredvnodes, M_HFSMNT, &cnodehash); chash_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(chash_lck_grp_attr); chash_lck_grp = lck_grp_alloc_init("cnode_hash", chash_lck_grp_attr); - chash_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(chash_lck_attr); lck_mtx_init(&hfs_chash_mutex, chash_lck_grp, chash_lck_attr); } diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 132b17cc2..e7d86d973 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -995,7 +995,8 @@ void hfs_unlock(struct cnode *cp) { vnode_t rvp = NULLVP; - vnode_t dvp = NULLVP; + vnode_t vp = NULLVP; + u_int32_t c_flag; /* System files need to keep track of owner */ if ((cp->c_fileid < kHFSFirstUserCatalogNodeID) && @@ -1012,21 +1013,31 @@ hfs_unlock(struct cnode *cp) } } } - if (cp->c_flag & C_NEED_DVNODE_PUT) - dvp = cp->c_vp; - - if (cp->c_flag & C_NEED_RVNODE_PUT) - rvp = cp->c_rsrc_vp; - - cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT); + c_flag = cp->c_flag; + cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE); + if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) { + vp = cp->c_vp; + } + if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) { + rvp = cp->c_rsrc_vp; + } - cp-> c_lockowner = NULL; + cp->c_lockowner = NULL; lck_rw_done(&cp->c_rwlock); - if (dvp) - vnode_put(dvp); - if (rvp) - vnode_put(rvp); + /* Perform any vnode post processing after cnode lock is dropped. */ + if (vp) { + if (c_flag & C_NEED_DATA_SETSIZE) + ubc_setsize(vp, 0); + if (c_flag & C_NEED_DVNODE_PUT) + vnode_put(vp); + } + if (rvp) { + if (c_flag & C_NEED_RSRC_SETSIZE) + ubc_setsize(rvp, 0); + if (c_flag & C_NEED_RVNODE_PUT) + vnode_put(rvp); + } } /* diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 7f6fa30e1..c819e792e 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -157,6 +157,9 @@ typedef struct cnode cnode_t; #define C_FORCEUPDATE 0x00100 /* force the catalog entry update */ #define C_HASXATTRS 0x00200 /* cnode has extended attributes */ +#define C_NEED_DATA_SETSIZE 0x01000 /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */ +#define C_NEED_RSRC_SETSIZE 0x02000 /* Do a ubc_setsize(0) on c_vp after the unlock */ + #define ZFTIMELIMIT (5 * 60) diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c index 94029ef73..e2b13ca1c 100644 --- a/bsd/hfs/hfs_encodings.c +++ b/bsd/hfs/hfs_encodings.c @@ -69,11 +69,8 @@ hfs_converterinit(void) SLIST_INIT(&hfs_encoding_list); encodinglst_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(encodinglst_lck_grp_attr); encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr); - encodinglst_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(encodinglst_lck_attr); lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 304f27e83..b9837f669 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -27,8 +27,6 @@ * volume format. */ -#include - #include "hfs_endian.h" #include "hfs_dbg.h" #include "hfscommon/headers/BTreesPrivate.h" diff --git a/bsd/hfs/hfs_endian.h b/bsd/hfs/hfs_endian.h index 330839d29..784299705 100644 --- a/bsd/hfs/hfs_endian.h +++ b/bsd/hfs/hfs_endian.h @@ -34,17 +34,16 @@ */ #include "hfs.h" #include "hfscommon/headers/BTreesInternal.h" -#include +#include /*********************/ /* BIG ENDIAN Macros */ /*********************/ -#if BYTE_ORDER == BIG_ENDIAN +#define SWAP_BE16(__a) OSSwapBigToHostInt16 (__a) +#define SWAP_BE32(__a) OSSwapBigToHostInt32 (__a) +#define SWAP_BE64(__a) OSSwapBigToHostInt64 (__a) - /* HFS is always big endian, make swaps into no-ops */ - #define SWAP_BE16(__a) (__a) - #define SWAP_BE32(__a) (__a) - #define SWAP_BE64(__a) (__a) +#if BYTE_ORDER == BIG_ENDIAN /* HFS is always big endian, no swapping needed */ #define SWAP_HFS_PLUS_FORK_DATA(__a) @@ -54,11 +53,6 @@ /************************/ #elif BYTE_ORDER == LITTLE_ENDIAN - /* HFS is always big endian, make swaps actually swap */ - #define SWAP_BE16(__a) NXSwapBigShortToHost (__a) - #define SWAP_BE32(__a) NXSwapBigLongToHost (__a) - #define SWAP_BE64(__a) NXSwapBigLongLongToHost (__a) - #define SWAP_HFS_PLUS_FORK_DATA(__a) hfs_swap_HFSPlusForkData ((__a)) #else diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h index 573b0c9e0..440cabc55 100644 --- a/bsd/hfs/hfs_fsctl.h +++ b/bsd/hfs/hfs_fsctl.h @@ -41,6 +41,9 @@ struct hfs_backingstoreinfo { /* HFS FS CONTROL COMMANDS */ +#define HFSIOC_RESIZE_PROGRESS _IOR('h', 1, u_int32_t) +#define HFS_RESIZE_PROGRESS IOCBASECMD(HFSIOC_RESIZE_PROGRESS) + #define HFSIOC_RESIZE_VOLUME _IOW('h', 2, u_int64_t) #define HFS_RESIZE_VOLUME IOCBASECMD(HFSIOC_RESIZE_VOLUME) diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index 19a226846..e4d57acb7 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -288,13 +288,10 @@ hfs_recording_stop(struct hfsmount *hfsmp) if (hfsmp->hfc_stage != HFC_RECORDING) return (EPERM); - hotfiles_collect(hfsmp); - - if (hfsmp->hfc_stage != HFC_RECORDING) - return (0); - hfsmp->hfc_stage = HFC_BUSY; + hotfiles_collect(hfsmp); + /* * Convert hot file data into a simple file id list.... * @@ -759,6 +756,7 @@ hfs_addhotfile_internal(struct vnode *vp) if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) || + (ffp->ff_size == 0) || (ffp->ff_blocks > hotdata->maxblocks) || (cp->c_flag & (C_DELETED | C_NOEXISTS)) || (cp->c_flags & UF_NODUMP) || @@ -822,7 +820,7 @@ hfs_removehotfile(struct vnode *vp) cp = VTOC(vp); if ((ffp->ff_bytesread == 0) || (ffp->ff_blocks == 0) || - (cp->c_atime < hfsmp->hfc_timebase)) { + (ffp->ff_size == 0) || (cp->c_atime < hfsmp->hfc_timebase)) { return (0); } diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index 7fc125ffa..de9d28f7b 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -852,6 +852,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { switch (ap->a_command) { + case HFS_RESIZE_PROGRESS: { + + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); + if (suser(cred, NULL) && + kauth_cred_getuid(cred) != vfsp->f_owner) { + return (EACCES); /* must be owner of file system */ + } + if (!vnode_isvroot(vp)) { + return (EINVAL); + } + return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); + } case HFS_RESIZE_VOLUME: { u_int64_t newsize; u_int64_t cursize; @@ -2549,7 +2561,8 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) /* Trap B-Tree writes */ if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || (VTOC(vp)->c_fileid == kHFSCatalogFileID) || - (VTOC(vp)->c_fileid == kHFSAttributesFileID)) { + (VTOC(vp)->c_fileid == kHFSAttributesFileID) || + (vp == VTOHFS(vp)->hfc_filevp)) { /* * Swap and validate the node if it is in native byte order. @@ -2818,10 +2831,10 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, lockflags = 0; } - // See comment up above about calls to hfs_fsync() - // - //if (retval == 0) - // retval = hfs_fsync(vp, MNT_WAIT, 0, p); + /* Push cnode's new extent data to disk. */ + if (retval == 0) { + (void) hfs_update(vp, MNT_WAIT); + } if (hfsmp->jnl) { if (cp->c_cnid < kHFSFirstUserCatalogNodeID) @@ -2915,7 +2928,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */ writebase = blkstart * blksize; copysize = blkcnt * blksize; - iosize = bufsize = MIN(copysize, 4096 * 16); + iosize = bufsize = MIN(copysize, 128 * 1024); offset = 0; if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index ba4a5983d..a1053d3c4 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -140,7 +140,9 @@ static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); static int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); -static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk); +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks); +static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, + u_int32_t catblks, u_int32_t fileID, int rsrcfork); /* @@ -1561,9 +1563,9 @@ hfs_sync_metadata(void *arg) priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize)); retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); - if (retval != 0) { - panic("hfs: sync_metadata: can't read super-block?! (retval 0x%x, priIDSector)\n", - retval, priIDSector); + if ((retval != 0) && (retval != ENXIO)) { + printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n", + priIDSector, retval); } if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { @@ -1765,7 +1767,7 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, if (fhlen < sizeof(struct hfsfid)) return (EINVAL); - result = hfs_vget(VFSTOHFS(mp), hfsfhp->hfsfid_cnid, &nvp, 0); + result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0); if (result) { if (result == ENOENT) result = ESTALE; @@ -1783,7 +1785,7 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, * error prone. Future, would be change the "wrap bit" to a unique * wrap number and use that for generation number. For now do this. */ - if ((hfsfhp->hfsfid_gen < VTOC(nvp)->c_itime)) { + if ((ntohl(hfsfhp->hfsfid_gen) < VTOC(nvp)->c_itime)) { hfs_unlock(VTOC(nvp)); vnode_put(nvp); return (ESTALE); @@ -1813,8 +1815,8 @@ hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t cont cp = VTOC(vp); hfsfhp = (struct hfsfid *)fhp; - hfsfhp->hfsfid_cnid = cp->c_fileid; - hfsfhp->hfsfid_gen = cp->c_itime; + hfsfhp->hfsfid_cnid = htonl(cp->c_fileid); + hfsfhp->hfsfid_gen = htonl(cp->c_itime); *fhlenp = sizeof(struct hfsfid); return (0); @@ -1845,10 +1847,7 @@ hfs_init(__unused struct vfsconf *vfsp) hfs_group_attr = lck_grp_attr_alloc_init(); hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); - - /* Turn on lock debugging */ - //lck_attr_setdebug(hfs_lock_attr); - + return (0); } @@ -2021,6 +2020,16 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_acquire(hfsmp); + /* + * Flush all dirty metadata buffers. + */ + buf_flushdirtyblks(hfsmp->hfs_devvp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_extents_vp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, "hfs_sysctl"); + buf_flushdirtyblks(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, "hfs_sysctl"); + if (hfsmp->hfs_attribute_vp) + buf_flushdirtyblks(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, "hfs_sysctl"); + HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1]; HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask; hfsmp->jvp = jvp; @@ -2221,11 +2230,6 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) (bcmp(cndesc.cd_nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) { linkref = strtoul((const char*)&cndesc.cd_nameptr[HFS_INODE_PREFIX_LEN], NULL, 10); cnattr.ca_rdev = linkref; - - // patch up the parentcnid - if (cnattr.ca_attrblks != 0) { - cndesc.cd_parentcnid = cnattr.ca_attrblks; - } } } @@ -3012,7 +3016,6 @@ __private_extern__ int hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t context) { - struct vnode* rvp = NULL; struct buf *bp = NULL; u_int64_t oldsize; u_int32_t newblkcnt; @@ -3021,20 +3024,22 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t int transaction_begun = 0; int error; - /* - * Grab the root vnode to serialize with another hfs_truncatefs call. - */ - error = hfs_vget(hfsmp, kHFSRootFolderID, &rvp, 0); - if (error) { - return (error); + + lck_mtx_lock(&hfsmp->hfs_mutex); + if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { + lck_mtx_unlock(&hfsmp->hfs_mutex); + return (EALREADY); } + hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; + hfsmp->hfs_resize_filesmoved = 0; + hfsmp->hfs_resize_totalfiles = 0; + lck_mtx_unlock(&hfsmp->hfs_mutex); + /* - * - HFS Plus file systems only. - * - Journaling must be enabled. + * - Journaled HFS Plus volumes only. * - No embedded volumes. */ - if ((hfsmp->hfs_flags & HFS_STANDARD) || - (hfsmp->jnl == NULL) || + if ((hfsmp->jnl == NULL) || (hfsmp->hfsPlusIOPosOffset != 0)) { error = EPERM; goto out; @@ -3051,13 +3056,12 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t goto out; } /* Make sure there's enough space to work with. */ - if (reclaimblks > (hfsmp->freeBlocks / 4)) { + if (reclaimblks >= hfs_freeblks(hfsmp, 1)) { error = ENOSPC; goto out; } - - printf("hfs_truncatefs: shrinking %s by %d blocks out of %d\n", - hfsmp->vcbVN, reclaimblks, hfsmp->totalBlocks); + /* Start with a clean journal. */ + journal_flush(hfsmp->jnl); if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; @@ -3077,7 +3081,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t transaction_begun = 0; /* Attempt to reclaim some space. */ - if (hfs_reclaimspace(hfsmp, newblkcnt) != 0) { + if (hfs_reclaimspace(hfsmp, newblkcnt, reclaimblks) != 0) { printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN); error = ENOSPC; goto out; @@ -3091,7 +3095,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t /* Check if we're clear now. */ if (hfs_isallocated(hfsmp, newblkcnt, reclaimblks - 1)) { printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN); - error = ENOSPC; + error = EAGAIN; /* tell client to try again */ goto out; } } @@ -3120,21 +3124,26 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t /* * Invalidate the existing alternate volume header. + * + * Don't do this as a transaction (don't call journal_modify_block) + * since this block will be outside of the truncated file system! */ if (hfsmp->hfs_alt_id_sector) { if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, hfsmp->hfs_phys_block_size, NOCRED, &bp) == 0) { - journal_modify_block_start(hfsmp->jnl, bp); bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_phys_block_size)), kMDBSize); - - journal_modify_block_end(hfsmp->jnl, bp); + (void) VNOP_BWRITE(bp); } else if (bp) { buf_brelse(bp); } bp = NULL; } + /* Log successful shrinking. */ + printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n", + hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks); + /* * Adjust file system variables and flush them to disk. */ @@ -3152,54 +3161,90 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t } if (transaction_begun) { hfs_end_transaction(hfsmp); + journal_flush(hfsmp->jnl); } - if (rvp) { - hfs_unlock(VTOC(rvp)); - vnode_put(rvp); - } + + lck_mtx_lock(&hfsmp->hfs_mutex); + hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; + lck_mtx_unlock(&hfsmp->hfs_mutex); + return (error); } + /* * Reclaim space at the end of a file system. */ static int -hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) +hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks) { struct vnode *vp = NULL; FCB *fcb; struct BTreeIterator * iterator = NULL; struct FSBufferDescriptor btdata; struct HFSPlusCatalogFile filerec; + struct filefork *fp; u_int32_t saved_next_allocation; cnid_t * cnidbufp; size_t cnidbufsize; - int filecnt; + int filecnt = 0; int maxfilecnt; u_long block; + u_long datablks; + u_long rsrcblks; + u_long blkstomove = 0; int lockflags; int i; int error; + int lastprogress = 0; - /* - * Check if Attributes file overlaps. - */ + + /* Check if Attributes file overlaps reclaim area. */ if (hfsmp->hfs_attribute_vp) { - struct filefork *fp; - fp = VTOF(hfsmp->hfs_attribute_vp); + datablks = 0; for (i = 0; i < kHFSPlusExtentDensity; ++i) { - block = fp->ff_extents[i].startBlock + - fp->ff_extents[i].blockCount; + if (fp->ff_extents[i].blockCount == 0) { + break; + } + datablks += fp->ff_extents[i].blockCount; + block = fp->ff_extents[i].startBlock + fp->ff_extents[i].blockCount; if (block >= startblk) { printf("hfs_reclaimspace: Attributes file can't move\n"); return (EPERM); } } + if ((i == kHFSPlusExtentDensity) && (fp->ff_blocks > datablks)) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, kHFSAttributesFileID, 0)) { + printf("hfs_reclaimspace: Attributes file can't move\n"); + return (EPERM); + } + } + } + /* Check if Catalog file overlaps reclaim area. */ + fp = VTOF(hfsmp->hfs_catalog_vp); + datablks = 0; + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (fp->ff_extents[i].blockCount == 0) { + break; + } + datablks += fp->ff_extents[i].blockCount; + block = fp->ff_extents[i].startBlock + fp->ff_extents[i].blockCount; + if (block >= startblk) { + printf("hfs_reclaimspace: Catalog file can't move\n"); + return (EPERM); + } + } + if ((i == kHFSPlusExtentDensity) && (fp->ff_blocks > datablks)) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, kHFSCatalogFileID, 0)) { + printf("hfs_reclaimspace: Catalog file can't move\n"); + return (EPERM); + } } - /* For now we'll move a maximum of 16,384 files. */ - maxfilecnt = MIN(hfsmp->hfs_filecount, 16384); + /* For now move a maximum of 250,000 files. */ + maxfilecnt = MIN(hfsmp->hfs_filecount, 250000); + maxfilecnt = MIN((u_long)maxfilecnt, reclaimblks); cnidbufsize = maxfilecnt * sizeof(cnid_t); if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) { return (ENOMEM); @@ -3219,14 +3264,13 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) btdata.itemSize = sizeof(filerec); btdata.itemCount = 1; - /* Keep the Catalog file locked during iteration. */ - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + /* Keep the Catalog and extents files locked during iteration. */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_SHARED_LOCK); + error = BTIterateRecord(fcb, kBTreeFirstRecord, iterator, NULL, NULL); if (error) { - hfs_systemfile_unlock(hfsmp, lockflags); - goto out; + goto end_iteration; } - /* * Iterate over all the catalog records looking for files * that overlap into the space we're trying to free up. @@ -3234,41 +3278,99 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) for (filecnt = 0; filecnt < maxfilecnt; ) { error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); if (error) { - if (error == btNotFound) - error = 0; + if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) { + error = 0; + } break; } - if (filerec.recordType != kHFSPlusFileRecord || - filerec.fileID == hfsmp->hfs_jnlfileid) + if (filerec.recordType != kHFSPlusFileRecord) { continue; + } + datablks = rsrcblks = 0; /* * Check if either fork overlaps target space. */ for (i = 0; i < kHFSPlusExtentDensity; ++i) { - block = filerec.dataFork.extents[i].startBlock + - filerec.dataFork.extents[i].blockCount; - if (block >= startblk) { - if (filerec.fileID == hfsmp->hfs_jnlfileid) { - printf("hfs_reclaimspace: cannot move active journal\n"); - error = EPERM; + if (filerec.dataFork.extents[i].blockCount != 0) { + datablks += filerec.dataFork.extents[i].blockCount; + block = filerec.dataFork.extents[i].startBlock + + filerec.dataFork.extents[i].blockCount; + if (block >= startblk) { + if ((filerec.fileID == hfsmp->hfs_jnlfileid) || + (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) { + printf("hfs_reclaimspace: cannot move active journal\n"); + error = EPERM; + goto end_iteration; + } + cnidbufp[filecnt++] = filerec.fileID; + blkstomove += filerec.dataFork.totalBlocks; break; } - cnidbufp[filecnt++] = filerec.fileID; - break; } - block = filerec.resourceFork.extents[i].startBlock + - filerec.resourceFork.extents[i].blockCount; - if (block >= startblk) { - cnidbufp[filecnt++] = filerec.fileID; - break; + if (filerec.resourceFork.extents[i].blockCount != 0) { + rsrcblks += filerec.resourceFork.extents[i].blockCount; + block = filerec.resourceFork.extents[i].startBlock + + filerec.resourceFork.extents[i].blockCount; + if (block >= startblk) { + cnidbufp[filecnt++] = filerec.fileID; + blkstomove += filerec.resourceFork.totalBlocks; + break; + } + } + } + /* + * Check for any overflow extents that overlap. + */ + if (i == kHFSPlusExtentDensity) { + if (filerec.dataFork.totalBlocks > datablks) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, datablks, filerec.fileID, 0)) { + cnidbufp[filecnt++] = filerec.fileID; + blkstomove += filerec.dataFork.totalBlocks; + } + } else if (filerec.resourceFork.totalBlocks > rsrcblks) { + if (hfs_overlapped_overflow_extents(hfsmp, startblk, rsrcblks, filerec.fileID, 1)) { + cnidbufp[filecnt++] = filerec.fileID; + blkstomove += filerec.resourceFork.totalBlocks; + } } } } + +end_iteration: + if (filecnt == 0) { + error = ENOSPC; + } /* All done with catalog. */ hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto out; + /* + * Double check space requirements to make sure + * there is enough space to relocate any files + * that reside in the reclaim area. + * + * Blocks To Move -------------- + * | | | + * V V V + * ------------------------------------------------------------------------ + * | | / /// // | + * | | / /// // | + * | | / /// // | + * ------------------------------------------------------------------------ + * + * <------------------- New Total Blocks ------------------><-- Reclaim --> + * + * <------------------------ Original Total Blocks -----------------------> + * + */ + if ((reclaimblks + blkstomove) >= hfs_freeblks(hfsmp, 1)) { + error = ENOSPC; + goto out; + } + hfsmp->hfs_resize_filesmoved = 0; + hfsmp->hfs_resize_totalfiles = filecnt; + /* Now move any files that are in the way. */ for (i = 0; i < filecnt; ++i) { struct vnode * rvp; @@ -3296,28 +3398,127 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) hfs_unlock(VTOC(vp)); vnode_put(vp); vp = NULL; + + ++hfsmp->hfs_resize_filesmoved; + + /* Report intermediate progress. */ + if (filecnt > 100) { + int progress; + + progress = (i * 100) / filecnt; + if (progress > (lastprogress + 9)) { + printf("hfs_reclaimspace: %d%% done...\n", progress); + lastprogress = progress; + } + } } if (vp) { hfs_unlock(VTOC(vp)); vnode_put(vp); vp = NULL; } - - /* - * Note: this implementation doesn't handle overflow extents. - */ + if (hfsmp->hfs_resize_filesmoved != 0) { + printf("hfs_reclaimspace: relocated %d files on \"%s\"\n", + (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN); + } out: kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize); - /* On errors restore the roving allocation pointer. */ - if (error) { + /* + * Restore the roving allocation pointer on errors. + * (but only if we didn't move any files) + */ + if (error && hfsmp->hfs_resize_filesmoved == 0) { hfsmp->nextAllocation = saved_next_allocation; } return (error); } +/* + * Check if there are any overflow extents that overlap. + */ +static int +hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t catblks, u_int32_t fileID, int rsrcfork) +{ + struct BTreeIterator * iterator = NULL; + struct FSBufferDescriptor btdata; + HFSPlusExtentRecord extrec; + HFSPlusExtentKey *extkeyptr; + FCB *fcb; + u_int32_t block; + u_int8_t forktype; + int overlapped = 0; + int i; + int error; + + forktype = rsrcfork ? 0xFF : 0; + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + return (0); + } + bzero(iterator, sizeof(*iterator)); + extkeyptr = (HFSPlusExtentKey *)&iterator->key; + extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength; + extkeyptr->forkType = forktype; + extkeyptr->fileID = fileID; + extkeyptr->startBlock = catblks; + + btdata.bufferAddress = &extrec; + btdata.itemSize = sizeof(extrec); + btdata.itemCount = 1; + + fcb = VTOF(hfsmp->hfs_extents_vp); + + error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator); + while (error == 0) { + /* Stop when we encounter a different file. */ + if ((extkeyptr->fileID != fileID) || + (extkeyptr->forkType != forktype)) { + break; + } + /* + * Check if the file overlaps target space. + */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + if (extrec[i].blockCount == 0) { + break; + } + block = extrec[i].startBlock + extrec[i].blockCount; + if (block >= startblk) { + overlapped = 1; + break; + } + } + /* Look for more records. */ + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + } + + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + return (overlapped); +} + + +/* + * Calculate the progress of a file system resize operation. + */ +__private_extern__ +int +hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) +{ + if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) { + return (ENXIO); + } + + if (hfsmp->hfs_resize_totalfiles > 0) + *progress = (hfsmp->hfs_resize_filesmoved * 100) / hfsmp->hfs_resize_totalfiles; + else + *progress = 0; + + return (0); +} + + /* * Get file system attributes. */ diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 507bb153c..726dee5e0 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1565,18 +1565,17 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if ((cp->c_flag & C_HARDLINK) == 0 && (!dataforkbusy || !rsrcforkbusy)) { /* - * A ubc_setsize can cause a pagein here - * so we need to the drop cnode lock. Note - * that we still hold the truncate lock. + * A ubc_setsize can cause a pagein so defer it + * until after the cnode lock is dropped. The + * cnode lock cannot be dropped/reacquired here + * since we might already hold the journal lock. */ - hfs_unlock(cp); if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) { - ubc_setsize(vp, 0); + cp->c_flag |= C_NEED_DATA_SETSIZE; } if (!rsrcforkbusy && rvp) { - ubc_setsize(rvp, 0); + cp->c_flag |= C_NEED_RSRC_SETSIZE; } - hfs_lock(cp, HFS_FORCE_LOCK); } else { struct cat_desc cndesc; @@ -1890,10 +1889,10 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, __private_extern__ void replace_desc(struct cnode *cp, struct cat_desc *cdp) { - if (&cp->c_desc == cdp) { - return; - } - + if (&cp->c_desc == cdp) { + return; + } + /* First release allocated name buffer */ if (cp->c_desc.cd_flags & CD_HASBUF && cp->c_desc.cd_nameptr != 0) { char *name = cp->c_desc.cd_nameptr; @@ -2829,7 +2828,7 @@ hfs_update(struct vnode *vp, __unused int waitfor) * we have to do the update. */ if (ISSET(cp->c_flag, C_FORCEUPDATE) == 0 && - (ISSET(cp->c_flag, C_DELETED) || + (ISSET(cp->c_flag, C_DELETED) || (dataforkp && cp->c_datafork->ff_unallocblocks) || (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) { // cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); @@ -3713,7 +3712,6 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout }, /* Pageout */ { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c index 7baf03fb4..00874f229 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c @@ -91,11 +91,9 @@ BTReserveSetup() nr_hashtbl = hashinit(NR_CACHE, M_HFSMNT, &nr_hashmask); nr_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nr_lck_grp_attr); nr_lck_grp = lck_grp_alloc_init("btree_node_reserve", nr_lck_grp_attr); nr_lck_attr = lck_attr_alloc_init(); - lck_attr_setdebug(nr_lck_attr); lck_mtx_init(&nr_mutex, nr_lck_grp, nr_lck_attr); } diff --git a/bsd/i386/_types.h b/bsd/i386/_types.h index 2a69df571..985c42769 100644 --- a/bsd/i386/_types.h +++ b/bsd/i386/_types.h @@ -33,7 +33,7 @@ typedef __signed char __int8_t; typedef char __int8_t; #endif /* !__GNUC__ */ typedef unsigned char __uint8_t; -typedef unsigned short __int16_t; +typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; diff --git a/bsd/i386/param.h b/bsd/i386/param.h index 6be5ae90a..f86c1351f 100644 --- a/bsd/i386/param.h +++ b/bsd/i386/param.h @@ -81,7 +81,7 @@ #define DEV_BSIZE 512 #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #define BLKDEV_IOSIZE 2048 -#define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ +#define MAXPHYS (128 * 1024) /* max raw I/O transfer size */ #define CLSIZE 1 #define CLSIZELOG2 0 diff --git a/bsd/i386/reg.h b/bsd/i386/reg.h index 35bb1130d..7ac3d8ef9 100644 --- a/bsd/i386/reg.h +++ b/bsd/i386/reg.h @@ -31,21 +31,6 @@ #ifndef _BSD_I386_REG_H_ #define _BSD_I386_REG_H_ -/* FIXME - should include mach/i386/thread_status.h and - construct the values from i386_saved_state - */ -#define EDX 9 -#define ECX 10 -#define EAX 11 -#define EIP 14 -#define EFL 16 -#define ESP 7 -#define UESP 17 -#define PS EFL -#define PC EIP -#define SP UESP - - #endif /* _BSD_I386_REG_H_ */ diff --git a/bsd/i386/setjmp.h b/bsd/i386/setjmp.h index 4d7ba1573..a9d64e054 100644 --- a/bsd/i386/setjmp.h +++ b/bsd/i386/setjmp.h @@ -31,6 +31,20 @@ #include #include + +#if defined(__x86_64__) +/* + * _JBLEN is number of ints required to save the following: + * rflags, rip, rbp, rsp, rbx, r12, r13, r14, r15... these are 8 bytes each + * mxcsr, fp control word, sigmask... these are 4 bytes each + * add 16 ints for future expansion needs... + */ +#define _JBLEN ((9 * 2) + 3 + 16) +typedef int jmp_buf[_JBLEN]; +typedef int sigjmp_buf[_JBLEN + 1]; + +#else + /* * _JBLEN is number of ints required to save the following: * eax, ebx, ecx, edx, edi, esi, ebp, esp, ss, eflags, eip, @@ -48,6 +62,7 @@ typedef int jmp_buf[_JBLEN]; typedef int sigjmp_buf[_JBLEN + 1]; #endif +#endif __BEGIN_DECLS extern int setjmp(jmp_buf env); diff --git a/bsd/i386/types.h b/bsd/i386/types.h index ab1c10837..d3841bcce 100644 --- a/bsd/i386/types.h +++ b/bsd/i386/types.h @@ -88,7 +88,11 @@ typedef long long int64_t; #endif typedef unsigned long long u_int64_t; +#if __LP64__ +typedef int64_t register_t; +#else typedef int32_t register_t; +#endif #ifndef _INTPTR_T #define _INTPTR_T @@ -102,14 +106,17 @@ typedef unsigned long int uintptr_t; /* These types are used for reserving the largest possible size. */ // LP64todo - typedef mach_vm_address_t user_addr_t; /* varying length pointers from user space */ // LP64todo - typedef mach_vm_size_t user_size_t; /* varying length values from user space (unsigned) */ -typedef u_int32_t user_addr_t; -typedef u_int32_t user_size_t; -typedef int32_t user_ssize_t; -typedef int32_t user_long_t; -typedef u_int32_t user_ulong_t; -typedef int32_t user_time_t; +typedef u_int64_t user_addr_t; +typedef u_int64_t user_size_t; +typedef int64_t user_ssize_t; +typedef int64_t user_long_t; +typedef u_int64_t user_ulong_t; +typedef int64_t user_time_t; #define USER_ADDR_NULL ((user_addr_t) 0) -#define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)(a_ptr)) +#define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)((uintptr_t)(a_ptr))) + +/* This defines the size of syscall arguments after copying into the kernel: */ +typedef u_int64_t syscall_arg_t; #ifndef __offsetof #define __offsetof(type, field) ((size_t)(&((type *)0)->field)) diff --git a/bsd/i386/ucontext.h b/bsd/i386/ucontext.h index 7f7a04474..5f3f5cba5 100644 --- a/bsd/i386/ucontext.h +++ b/bsd/i386/ucontext.h @@ -27,20 +27,29 @@ #include #include -#ifdef __APPLE_API_UNSTABLE -/* WARNING: THIS WILL CHANGE; DO NOT COUNT ON THIS */ -/* Needs to be finalized as to what it should contain */ #ifndef _POSIX_C_SOURCE struct mcontext #else /* _POSIX_C_SOURCE */ struct __darwin_mcontext #endif /* _POSIX_C_SOURCE */ { - struct sigcontext sc; +#if __LP64__ + x86_exception_state64_t es; + x86_thread_state64_t ss; + x86_float_state64_t fs; +#else + x86_exception_state32_t es; + x86_thread_state32_t ss; + x86_float_state32_t fs; +#endif }; #ifndef _POSIX_C_SOURCE -#define I386_MCONTEXT_SIZE sizeof(struct mcontext) +#if __LP64__ +#define I386_MCONTEXT_SIZE (x86_THREAD_STATE64_COUNT + x86_FLOAT_STATE64_COUNT + x86_EXCEPTION_STATE64_COUNT) * sizeof(int) +#else +#define I386_MCONTEXT_SIZE (x86_THREAD_STATE32_COUNT + x86_FLOAT_STATE32_COUNT + x86_EXCEPTION_STATE32_COUNT) * sizeof(int) +#endif #endif /* _POSIX_C_SOURCE */ #ifndef _MCONTEXT_T @@ -48,21 +57,20 @@ struct __darwin_mcontext typedef __darwin_mcontext_t mcontext_t; #endif -#ifndef _POSIX_C_SOURCE -struct mcontext64 -{ - struct sigcontext sc; +#ifdef XNU_KERNEL_PRIVATE +struct mcontext64 { + x86_exception_state64_t es; + x86_thread_state64_t ss; + x86_float_state64_t fs; }; -#define I386_MCONTEXT64_SIZE sizeof(struct mcontext64) -#ifndef _MCONTEXT64_T -#define _MCONTEXT64_T -typedef struct mcontext64 * mcontext64_t; +struct mcontext32 { + x86_exception_state32_t es; + x86_thread_state32_t ss; + x86_float_state32_t fs; +}; #endif -#endif /* _POSIX_C_SOURCE */ - -#endif /* __APPLE_API_UNSTABLE */ #endif /* _I386_UCONTEXT_H_ */ diff --git a/bsd/i386/vmparam.h b/bsd/i386/vmparam.h index ffb77f55b..4e9cc2cff 100644 --- a/bsd/i386/vmparam.h +++ b/bsd/i386/vmparam.h @@ -25,7 +25,10 @@ #include -#define USRSTACK (0xC0000000) +/* Rosetta dependency on this address */ +#define USRSTACK VM_USRSTACK32 + +#define USRSTACK64 VM_USRSTACK64 /* * Virtual memory related constants, all in bytes @@ -37,10 +40,10 @@ #define MAXDSIZ (RLIM_INFINITY) /* max data size */ #endif #ifndef DFLSSIZ -#define DFLSSIZ (8*1024*1024 - 7*4*1024) /* initial stack size limit */ +#define DFLSSIZ (8*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ -#define MAXSSIZ (64*1024*1024 - 7*4*1024) /* max stack size */ +#define MAXSSIZ (64*1024*1024) /* max stack size */ #endif #ifndef DFLCSIZ #define DFLCSIZ (0) /* initial core size limit */ diff --git a/bsd/isofs/cd9660/cd9660_lookup.c b/bsd/isofs/cd9660/cd9660_lookup.c index 2ecf2568e..3498811ae 100644 --- a/bsd/isofs/cd9660/cd9660_lookup.c +++ b/bsd/isofs/cd9660/cd9660_lookup.c @@ -191,9 +191,6 @@ cd9660_lookup(struct vnop_lookup_args *ap) !((len == 1 && *name == '.') || (flags & ISDOTDOT))) { int flags1 = UTF_PRECOMPOSED; - if (BYTE_ORDER != BIG_ENDIAN) - flags1 |= UTF_REVERSE_ENDIAN; - (void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen, sizeof(altname), 0, flags1); name = altname; diff --git a/bsd/isofs/cd9660/cd9660_mount.h b/bsd/isofs/cd9660/cd9660_mount.h index c673db1b1..94318f4a0 100644 --- a/bsd/isofs/cd9660/cd9660_mount.h +++ b/bsd/isofs/cd9660/cd9660_mount.h @@ -92,21 +92,13 @@ struct iso_args { */ /* LP64todo - should this move? */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_iso_args { int flags; /* mounting flags, see below */ int ssector; /* starting sector, 0 for 1st session */ int toc_length; /* Size of *toc, including the toc.length field */ - user_addr_t toc; + user_addr_t toc __attribute((aligned(8))); }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL */ #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/isofs/cd9660/cd9660_util.c b/bsd/isofs/cd9660/cd9660_util.c index a858d848b..89e0cd57a 100644 --- a/bsd/isofs/cd9660/cd9660_util.c +++ b/bsd/isofs/cd9660/cd9660_util.c @@ -85,7 +85,6 @@ #include #include #include -#include #include #include @@ -156,6 +155,10 @@ isofncmp(u_char *fn, int fnlen, u_char *isofn, int isolen) /* * translate and compare a UCS-2 filename * Note: Version number plus ';' may be omitted. + * + * The name pointed to by "fn" is the search name, whose characters are + * in native endian order. The name "ucsfn" is the on-disk name, whose + * characters are in big endian order. */ int @@ -171,7 +174,7 @@ ucsfncmp(u_int16_t *fn, int fnlen, u_int16_t *ucsfn, int ucslen) while (--fnlen >= 0) { if (--ucslen < 0) return *fn; - if ((c = *ucsfn++) == UCS_SEPARATOR2) { + if ((c = OSSwapBigToHostInt16(*ucsfn++)) == UCS_SEPARATOR2) { switch (*fn++) { default: return *--fn; @@ -185,7 +188,7 @@ ucsfncmp(u_int16_t *fn, int fnlen, u_int16_t *ucsfn, int ucslen) return -1; } } - for (j = 0; --ucslen >= 0; j = j * 10 + *ucsfn++ - '0'); + for (j = 0; --ucslen >= 0; j = j * 10 + OSSwapBigToHostInt16(*ucsfn++) - '0'); return i - j; } if (c != *fn) @@ -196,10 +199,10 @@ ucsfncmp(u_int16_t *fn, int fnlen, u_int16_t *ucsfn, int ucslen) switch (*ucsfn) { default: return -1; - case UCS_SEPARATOR1: - if (ucsfn[1] != UCS_SEPARATOR2) + case OSSwapHostToBigConstInt16(UCS_SEPARATOR1): + if (ucsfn[1] != OSSwapHostToBigConstInt16(UCS_SEPARATOR2)) return -1; - case UCS_SEPARATOR2: + case OSSwapHostToBigConstInt16(UCS_SEPARATOR2): return 0; } } @@ -286,9 +289,9 @@ ucsfntrans(u_int16_t *infn, int infnlen, u_char *outfn, u_short *outfnlen, /* strip file version number */ for (fnidx--; fnidx > 0; fnidx--) { /* stop when ';' is found */ - if (infn[fnidx] == UCS_SEPARATOR2) { + if (infn[fnidx] == OSSwapHostToBigConstInt16(UCS_SEPARATOR2)) { /* drop dangling dot */ - if (fnidx > 0 && infn[fnidx-1] == UCS_SEPARATOR1) + if (fnidx > 0 && infn[fnidx-1] == OSSwapHostToBigConstInt16(UCS_SEPARATOR1)) fnidx--; break; } diff --git a/bsd/isofs/cd9660/cd9660_vfsops.c b/bsd/isofs/cd9660/cd9660_vfsops.c index f026c811f..107bdc848 100644 --- a/bsd/isofs/cd9660/cd9660_vfsops.c +++ b/bsd/isofs/cd9660/cd9660_vfsops.c @@ -79,7 +79,6 @@ #include #include #include -#include #include #include @@ -932,7 +931,7 @@ cd9660_fhtovp(mount_t mp, int fhlen, unsigned char *fhp, vnode_t *vpp, vfs_conte ifhp->ifid_ino, ifhp->ifid_start); #endif - if ( (error = VFS_VGET(mp, (ino64_t)ifhp->ifid_ino, &nvp, context)) ) { + if ( (error = VFS_VGET(mp, (ino64_t)ntohl(ifhp->ifid_ino), &nvp, context)) ) { *vpp = NULLVP; return (error); } @@ -1592,8 +1591,8 @@ cd9660_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_co ifhp = (struct ifid *)fhp; - ifhp->ifid_ino = ip->i_number; - ifhp->ifid_start = ip->iso_start; + ifhp->ifid_ino = htonl(ip->i_number); + ifhp->ifid_start = htonl(ip->iso_start); *fhlenp = sizeof(struct ifid); #ifdef ISOFS_DBG diff --git a/bsd/isofs/cd9660/cd9660_vnops.c b/bsd/isofs/cd9660/cd9660_vnops.c index 6789bfc1b..b8994eb2f 100644 --- a/bsd/isofs/cd9660/cd9660_vnops.c +++ b/bsd/isofs/cd9660/cd9660_vnops.c @@ -93,7 +93,7 @@ #include #include #include -#include +#include #include #include /* kmem_alloc, kmem_free */ @@ -975,12 +975,12 @@ cd9660_xa_init(struct iso_node *ip, struct iso_directory_record *isodir) sectors = ip->i_size / 2048; strncpy(header->riff, "RIFF", 4); - header->fileSize = NXSwapHostLongToLittle(sectors * CDXA_SECTOR_SIZE + sizeof(struct riff_header) - 8); + header->fileSize = OSSwapHostToLittleInt32(sectors * CDXA_SECTOR_SIZE + sizeof(struct riff_header) - 8); strncpy(header->cdxa, "CDXA", 4); strncpy(header->fmt, "fmt ", 4); - header->fmtSize = NXSwapHostLongToLittle(16); + header->fmtSize = OSSwapHostToLittleConstInt32(16); strncpy(header->data, "data", 4); - header->dataSize = NXSwapHostLongToLittle(sectors * CDXA_SECTOR_SIZE); + header->dataSize = OSSwapHostToLittleInt32(sectors * CDXA_SECTOR_SIZE); /* * Copy the CD-ROM XA extended directory information into the header. As far as @@ -1361,7 +1361,6 @@ struct vnodeopv_entry_desc cd9660_specop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index a0d66765f..05b66d530 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -117,6 +117,8 @@ #include +#include + extern int app_profile; /* on/off switch for pre-heat cache */ char copyright[] = @@ -156,9 +158,13 @@ char hostname[MAXHOSTNAMELEN]; int hostnamelen; char domainname[MAXDOMNAMELEN]; int domainnamelen; -char classichandler[32] = {0}; -uint32_t classichandler_fsid = -1L; -long classichandler_fileid = -1L; +#if __i386__ +struct exec_archhandler exec_archhandler_ppc = { + .path = "/usr/libexec/oah/translate", +}; +#else /* __i386__ */ +struct exec_archhandler exec_archhandler_ppc; +#endif /* __i386__ */ char rootdevice[16]; /* hfs device names have at least 9 chars */ @@ -176,6 +182,7 @@ vm_map_t mb_map; semaphore_t execve_semaphore; int cmask = CMASK; +extern int customnbuf; int parse_bsd_args(void); extern int bsd_hardclockinit; @@ -193,6 +200,12 @@ extern void sysv_sem_lock_init(void); extern void sysv_msg_lock_init(void); extern void pshm_lock_init(); extern void psem_lock_init(); +extern int maxprocperuid; + +/* kmem access not enabled by default; can be changed with boot-args */ +int setup_kmem = 0; + +extern void stackshot_lock_init(); /* * Initialization code. @@ -261,6 +274,7 @@ bsd_init() extern kauth_cred_t rootcred; register int i; int s; + int error; thread_t th; struct vfs_context context; void lightning_bolt(void ); @@ -302,17 +316,13 @@ bsd_init() /* give kernproc a name */ process_name("kernel_task", p); - /* allocate proc lock group attribute and group */ proc_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(proc_lck_grp_attr); - + proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); - /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(proc_lck_attr); lck_mtx_init(&p->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&p->p_fdmlock, proc_lck_grp, proc_lck_attr); @@ -384,7 +394,7 @@ bsd_init() limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; - limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC; + limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur = maxprocperuid; limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; limit0.pl_rlimit[RLIMIT_STACK] = vm_initial_limit_stack; limit0.pl_rlimit[RLIMIT_DATA] = vm_initial_limit_data; @@ -473,6 +483,8 @@ bsd_init() psem_cache_init(); time_zone_slock_init(); + /* Stack snapshot facility lock */ + stackshot_lock_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. @@ -584,6 +596,13 @@ bsd_init() bsd_utaskbootstrap(); +#if __i386__ + // this should be done after the root filesystem is mounted + error = set_archhandler(p, CPU_TYPE_POWERPC); + if (error) + exec_archhandler_ppc.path[0] = 0; +#endif + /* invoke post-root-mount hook */ if (mountroot_post_hook != NULL) mountroot_post_hook(); @@ -624,7 +643,6 @@ bsdinit_task(void) ut = (uthread_t)get_bsdthread_info(th_act); - ut->uu_ar0 = (void *)get_user_regs(th_act); bsd_hardclockinit = 1; /* Start bsd hardclock */ bsd_init_task = get_threadtask(th_act); @@ -794,7 +812,9 @@ parse_bsd_args() PE_parse_boot_arg("srv", &srv); PE_parse_boot_arg("ncl", &ncl); - PE_parse_boot_arg("nbuf", &nbuf); + if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) + customnbuf = 1; + PE_parse_boot_arg("kmem", &setup_kmem); return 0; } diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index 9a0c06054..0e885c2ae 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -31,6 +31,8 @@ #include #include /* for SET */ #include +#include +#include /* Just to satisfy pstat command */ int dmmin, dmmax, dmtext; @@ -41,7 +43,7 @@ kmem_mb_alloc(vm_map_t mbmap, int size) vm_offset_t addr; if (kernel_memory_allocate(mbmap, &addr, size, 0, - KMA_NOPAGEWAIT|KMA_KOBJECT) == KERN_SUCCESS) + KMA_NOPAGEWAIT|KMA_KOBJECT|KMA_LOMEM) == KERN_SUCCESS) return(addr); else return(0); @@ -292,3 +294,66 @@ tbeproc(void *procp) return; } + +/* + * WARNING - this is a temporary workaround for binary compatibility issues + * with anti-piracy software that relies on patching ptrace (3928003). + * This KPI will be removed in the system release after Tiger. + */ +uintptr_t temp_patch_ptrace(uintptr_t new_ptrace) +{ + struct sysent * callp; + sy_call_t * old_ptrace; +#ifndef __ppc__ + boolean_t funnel_state; +#endif + + if (new_ptrace == 0) + return(0); + +#ifdef __ppc__ + enter_funnel_section(kernel_flock); +#else + funnel_state = thread_funnel_set(kernel_flock, TRUE); +#endif + callp = &sysent[26]; + old_ptrace = callp->sy_call; + + /* only allow one patcher of ptrace */ + if (old_ptrace == (sy_call_t *) ptrace) { + callp->sy_call = (sy_call_t *) new_ptrace; + } + else { + old_ptrace = NULL; + } +#ifdef __ppc__ + exit_funnel_section( ); +#else + (void)thread_funnel_set(kernel_flock, funnel_state); +#endif + + return((uintptr_t)old_ptrace); +} + +void temp_unpatch_ptrace(void) +{ + struct sysent * callp; +#ifndef __ppc__ + boolean_t funnel_state; +#endif + +#ifdef __ppc__ + enter_funnel_section(kernel_flock); +#else + funnel_state = thread_funnel_set(kernel_flock, TRUE); +#endif + callp = &sysent[26]; + callp->sy_call = (sy_call_t *) ptrace; +#ifdef __ppc__ + exit_funnel_section( ); +#else + (void)thread_funnel_set(kernel_flock, funnel_state); +#endif + + return; +} diff --git a/bsd/kern/init_sysent.c b/bsd/kern/init_sysent.c index 3a78e1d3e..f4efae621 100644 --- a/bsd/kern/init_sysent.c +++ b/bsd/kern/init_sysent.c @@ -32,11 +32,7 @@ #include #include #include -#ifdef __ppc__ #define AC(name) (sizeof(struct name) / sizeof(uint64_t)) -#else -#define AC(name) (sizeof(struct name) / sizeof(register_t)) -#endif /* The casts are bogus but will do for now. */ @@ -148,11 +144,7 @@ __private_extern__ struct sysent sysent[] = { {AC(getpriority_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getpriority, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 100 = getpriority */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 101 = nosys old send */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 102 = nosys old recv */ -#ifdef __ppc__ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 103 = nosys old sigreturn */ -#else - {AC(sigreturn_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)sigreturn, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 103 = sigreturn */ -#endif {AC(bind_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)bind, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 104 = bind */ {AC(setsockopt_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 105 = setsockopt */ {AC(listen_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)listen, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 106 = listen */ @@ -165,11 +157,7 @@ __private_extern__ struct sysent sysent[] = { {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 113 = nosys old recvmsg */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 114 = nosys old sendmsg */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 115 = nosys old vtrace */ -#ifdef __ppc__ - {AC(ppc_gettimeofday_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ppc_gettimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 116 = ppc_gettimeofday */ -#else {AC(gettimeofday_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)gettimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 116 = gettimeofday */ -#endif {AC(getrusage_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getrusage, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 117 = getrusage */ {AC(getsockopt_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 118 = getsockopt */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 119 = nosys old resuba */ @@ -245,12 +233,8 @@ __private_extern__ struct sysent sysent[] = { {AC(setgid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setgid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 181 = setgid */ {AC(setegid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setegid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 182 = setegid */ {AC(seteuid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)seteuid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 183 = seteuid */ -#ifdef __ppc__ {AC(sigreturn_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigreturn, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 184 = sigreturn */ -#else - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 184 = nosys */ -#endif - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 185 = nosys */ + {AC(chud_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)chud, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 185 = chud */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 186 = nosys */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 187 = nosys */ {AC(stat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)stat, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 188 = stat */ @@ -275,7 +259,6 @@ __private_extern__ struct sysent sysent[] = { {AC(mlock_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 203 = mlock */ {AC(munlock_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)munlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 204 = munlock */ {AC(undelete_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)undelete, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 205 = undelete */ -#ifdef __ppc__ {AC(ATsocket_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ATsocket, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 206 = ATsocket */ {AC(ATgetmsg_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATgetmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 207 = ATgetmsg */ {AC(ATputmsg_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATputmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 208 = ATputmsg */ @@ -284,16 +267,6 @@ __private_extern__ struct sysent sysent[] = { {AC(ATPgetreq_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPgetreq, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 211 = ATPgetreq */ {AC(ATPgetrsp_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPgetrsp, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 212 = ATPgetrsp */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 213 = nosys Reserved for AppleTalk */ -#else - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 206 = ATsocket */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 207 = ATgetmsg */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 208 = ATputmsg */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 209 = ATPsndreq */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 210 = ATPsndrsp */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 211 = ATPgetreq */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 212 = ATPgetrsp */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 213 = nosys Reserved for AppleTalk */ -#endif /* __ppc__ */ {AC(kqueue_from_portset_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)kqueue_from_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 214 = kqueue_from_portset_np */ {AC(kqueue_portset_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)kqueue_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 215 = kqueue_portset_np */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 216 = mkcomplex soon to be obsolete */ @@ -425,7 +398,7 @@ __private_extern__ struct sysent sysent[] = { {AC(__pthread_canceled_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)__pthread_canceled, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 333 = __pthread_canceled */ {AC(__semwait_signal_args), _SYSCALL_CANCEL_POST, NO_FUNNEL, (sy_call_t *)__semwait_signal, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 334 = __semwait_signal */ {AC(utrace_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)utrace, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 335 = utrace */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 336 = nosys */ + {AC(proc_info_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)proc_info, munge_wwwlww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 336 = proc_info */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 337 = nosys */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 338 = nosys */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 339 = nosys */ @@ -454,7 +427,7 @@ __private_extern__ struct sysent sysent[] = { {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)kqueue, NULL, NULL, _SYSCALL_RET_INT_T}, /* 362 = kqueue */ {AC(kevent_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)kevent, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 363 = kevent */ {AC(lchown_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 364 = lchown */ - {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 365 = nosys */ + {AC(stack_snapshot_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)stack_snapshot, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 365 = stack_snapshot */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 366 = nosys */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 367 = nosys */ {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 368 = nosys */ diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index c72e598fb..286a9d8ce 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,38 @@ #include #include +#include +#include + +#include /* for host_info() */ +#include + +/* XXX should have prototypes, but Mach does not provide one */ +void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); +int cpu_number(void); /* XXX include path broken */ + +/* XXX should probably be static, but it's debugging code... */ +int kdbg_read(user_addr_t, size_t *); +void kdbg_control_chud(int, void *); +int kdbg_control(int *, u_int, user_addr_t, size_t *); +int kdbg_getentropy (user_addr_t, size_t *, int); +int kdbg_readmap(user_addr_t, size_t *); +int kdbg_getreg(kd_regtype *); +int kdbg_setreg(kd_regtype *); +int kdbg_setrtcdec(kd_regtype *); +int kdbg_setpidex(kd_regtype *); +int kdbg_setpid(kd_regtype *); +void kdbg_mapinit(void); +int kdbg_reinit(void); +int kdbg_bootstrap(void); + +static int create_buffers(void); +static void delete_buffers(void); + +#ifdef ppc +extern uint32_t maxDec; +#endif + /* trace enable status */ unsigned int kdebug_enable = 0; @@ -58,12 +91,27 @@ unsigned int kd_entropy_buftomem = 0; unsigned int kdebug_slowcheck=SLOW_NOLOG; -/* kd_buf kd_buffer[kd_bufsize/sizeof(kd_buf)]; */ -kd_buf * kd_bufptr; -unsigned int kd_buftomem=0; -kd_buf * kd_buffer=0; -kd_buf * kd_buflast; -kd_buf * kd_readlast; +unsigned int kd_cpus; + +struct kd_bufinfo { + kd_buf * kd_stop; + kd_buf * kd_bufptr; + kd_buf * kd_buffer; + kd_buf * kd_buflast; + kd_buf * kd_readlast; + int kd_wrapped; /* plus, the global flag KDBG_WRAPPED is set if one of the buffers has wrapped */ + uint64_t kd_prev_timebase; + int kd_pad[24]; /* pad out to 128 bytes so that no cache line is shared between CPUs */ + +}; + +struct kd_bufinfo *kdbip = NULL; + +#define KDCOPYBUF_COUNT 1024 +#define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf)) +kd_buf *kdcopybuf = NULL; + + unsigned int nkdbufs = 8192; unsigned int kd_bufsize = 0; unsigned int kdebug_flags = 0; @@ -74,17 +122,28 @@ unsigned int kdlog_value2=0; unsigned int kdlog_value3=0; unsigned int kdlog_value4=0; -unsigned long long kd_prev_timebase = 0LL; +static lck_mtx_t * kd_trace_mtx_sysctl; +static lck_grp_t * kd_trace_mtx_sysctl_grp; +static lck_attr_t * kd_trace_mtx_sysctl_attr; +static lck_grp_attr_t *kd_trace_mtx_sysctl_grp_attr; + +static lck_grp_t *stackshot_subsys_lck_grp; +static lck_grp_attr_t *stackshot_subsys_lck_grp_attr; +static lck_attr_t *stackshot_subsys_lck_attr; +static lck_mtx_t stackshot_subsys_mutex; + +void *stackshot_snapbuf = NULL; + +int +stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, register_t *retval); -static lck_mtx_t * kd_trace_mtx; -static lck_grp_t * kd_trace_mtx_grp; -static lck_attr_t * kd_trace_mtx_attr; -static lck_grp_attr_t *kd_trace_mtx_grp_attr; +extern void +kdp_snapshot_preflight(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t options); -static lck_spin_t * kd_trace_lock; -static lck_grp_t * kd_trace_lock_grp; -static lck_attr_t * kd_trace_lock_attr; -static lck_grp_attr_t *kd_trace_lock_grp_attr; +extern int +kdp_stack_snapshot_geterror(void); +extern unsigned int +kdp_stack_snapshot_bytes_traced(void); kd_threadmap *kd_mapptr = 0; unsigned int kd_mapsize = 0; @@ -98,7 +157,7 @@ pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer /* task to string structure */ struct tts { - task_t *task; /* from procs task */ + task_t task; /* from procs task */ pid_t pid; /* from procs p_pid */ char task_comm[20]; /* from procs p_comm */ }; @@ -122,12 +181,11 @@ typedef void (*kd_chudhook_fn) (unsigned int debugid, unsigned int arg1, kd_chudhook_fn kdebug_chudhook = 0; /* pointer to CHUD toolkit function */ +__private_extern__ void stackshot_lock_init( void ); /* Support syscall SYS_kdebug_trace */ -kdebug_trace(p, uap, retval) - struct proc *p; - struct kdebug_trace_args *uap; - register_t *retval; +int +kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused register_t *retval) { if ( (kdebug_enable == 0) ) return(EINVAL); @@ -137,30 +195,92 @@ kdebug_trace(p, uap, retval) } -void -kernel_debug(debugid, arg1, arg2, arg3, arg4, arg5) -unsigned int debugid, arg1, arg2, arg3, arg4, arg5; + +static int +create_buffers(void) +{ + unsigned int cpu, i; + int nentries; + + nentries = nkdbufs / kd_cpus; + kd_bufsize = nentries * sizeof(kd_buf); + + bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + + if (kdcopybuf == 0) { + if (kmem_alloc(kernel_map, (unsigned int *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) + return ENOMEM; + } + for (cpu = 0; cpu < kd_cpus; cpu++) { + if (kmem_alloc(kernel_map, (unsigned int *)&kdbip[cpu].kd_buffer, kd_bufsize) != KERN_SUCCESS) + break; + } + if (cpu < kd_cpus) { + for (i = 0; i < cpu; i++) + kmem_free(kernel_map, (vm_offset_t)kdbip[i].kd_buffer, kd_bufsize); + kd_bufsize = 0; + + kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE); + kdcopybuf = NULL; + + return(ENOMEM); + } + for (cpu = 0; cpu < kd_cpus; cpu++) { + kdbip[cpu].kd_bufptr = kdbip[cpu].kd_buffer; + kdbip[cpu].kd_buflast = &kdbip[cpu].kd_bufptr[nentries]; + kdbip[cpu].kd_readlast = kdbip[cpu].kd_bufptr; + } + kdebug_flags |= KDBG_BUFINIT; + + return(0); +} + + +static void +delete_buffers(void) { + unsigned int cpu; + + if (kd_bufsize && (kdebug_flags & KDBG_BUFINIT)) { + for (cpu = 0; cpu < kd_cpus; cpu++) + kmem_free(kernel_map, (vm_offset_t)kdbip[cpu].kd_buffer, kd_bufsize); + kd_bufsize = 0; + } + if (kdcopybuf) { + kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE); + kdcopybuf = NULL; + } + kdebug_flags &= ~KDBG_BUFINIT; +} + + +static void +kernel_debug_internal(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, + unsigned int arg4, unsigned int arg5, int entropy_flag) +{ + int s; kd_buf * kd; struct proc *curproc; - int s; unsigned long long now; + int cpu; + + s = ml_set_interrupts_enabled(FALSE); + now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; + cpu = cpu_number(); if (kdebug_enable & KDEBUG_ENABLE_CHUD) { if (kdebug_chudhook) kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) - return; + goto out; } - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kd_trace_lock); if (kdebug_slowcheck == 0) goto record_trace; - if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) + if (entropy_flag && (kdebug_enable & KDEBUG_ENABLE_ENTROPY)) { if (kd_entropy_indx < kd_entropy_count) { @@ -177,11 +297,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; } if ( (kdebug_slowcheck & SLOW_NOLOG) ) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } + goto out; if (kdebug_flags & KDBG_PIDCHECK) { @@ -189,11 +305,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; curproc = current_proc(); if ((curproc && !(curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } + goto out; } else if (kdebug_flags & KDBG_PIDEXCLUDE) { @@ -201,22 +313,14 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; curproc = current_proc(); if ((curproc && (curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } + goto out; } if (kdebug_flags & KDBG_RANGECHECK) { - if ((debugid < kdlog_beg) || (debugid >= kdlog_end) - && (debugid >> 24 != DBG_TRACE)) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } + if ((debugid < kdlog_beg) + || ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE))) + goto out; } else if (kdebug_flags & KDBG_VALCHECK) { @@ -225,234 +329,114 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; (debugid & DBG_FUNC_MASK) != kdlog_value3 && (debugid & DBG_FUNC_MASK) != kdlog_value4 && (debugid >> 24 != DBG_TRACE)) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } + goto out; } record_trace: - kd = kd_bufptr; + kd = kdbip[cpu].kd_bufptr; kd->debugid = debugid; kd->arg1 = arg1; kd->arg2 = arg2; kd->arg3 = arg3; kd->arg4 = arg4; - kd->arg5 = (int)current_thread(); + kd->arg5 = arg5; - now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; - - /* Watch for out of order timestamps */ - - if (now < kd_prev_timebase) + /* + * Watch for out of order timestamps + */ + if (now < kdbip[cpu].kd_prev_timebase) { - now = ++kd_prev_timebase & KDBG_TIMESTAMP_MASK; + /* + * if so, just store the previous timestamp + a cycle + */ + now = ++kdbip[cpu].kd_prev_timebase & KDBG_TIMESTAMP_MASK; } else { - /* Then just store the previous timestamp */ - kd_prev_timebase = now; + kdbip[cpu].kd_prev_timebase = now; } - kd->timestamp = now | (((uint64_t)cpu_number()) << KDBG_CPU_SHIFT); + kd->timestamp = now | (((uint64_t)cpu) << KDBG_CPU_SHIFT); + + kdbip[cpu].kd_bufptr++; - kd_bufptr++; + if (kdbip[cpu].kd_bufptr >= kdbip[cpu].kd_buflast) + kdbip[cpu].kd_bufptr = kdbip[cpu].kd_buffer; - if (kd_bufptr >= kd_buflast) - kd_bufptr = kd_buffer; - if (kd_bufptr == kd_readlast) { + if (kdbip[cpu].kd_bufptr == kdbip[cpu].kd_readlast) { if (kdebug_flags & KDBG_NOWRAP) kdebug_slowcheck |= SLOW_NOLOG; + kdbip[cpu].kd_wrapped = 1; kdebug_flags |= KDBG_WRAPPED; } - lck_spin_unlock(kd_trace_lock); + +out: ml_set_interrupts_enabled(s); } void -kernel_debug1(debugid, arg1, arg2, arg3, arg4, arg5) -unsigned int debugid, arg1, arg2, arg3, arg4, arg5; +kernel_debug(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, + unsigned int arg4, __unused unsigned int arg5) { - kd_buf * kd; - struct proc *curproc; - int s; - unsigned long long now; - - if (kdebug_enable & KDEBUG_ENABLE_CHUD) { - if (kdebug_chudhook) - (void)kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); + kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (int)current_thread(), 1); +} - if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) - return; - } - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kd_trace_lock); +void +kernel_debug1(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, + unsigned int arg4, unsigned int arg5) +{ + kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0); +} - if (kdebug_slowcheck == 0) - goto record_trace1; +static void +kdbg_lock_init(void) +{ + host_basic_info_data_t hinfo; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; - if ( (kdebug_slowcheck & SLOW_NOLOG) ) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } - if (kdebug_flags & KDBG_PIDCHECK) - { - /* If kdebug flag is not set for current proc, return */ - curproc = current_proc(); - if ((curproc && !(curproc->p_flag & P_KDEBUG)) && - ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } - } - else if (kdebug_flags & KDBG_PIDEXCLUDE) - { - /* If kdebug flag is set for current proc, return */ - curproc = current_proc(); - if ((curproc && (curproc->p_flag & P_KDEBUG)) && - ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); + if (kdebug_flags & KDBG_LOCKINIT) return; - } - } - if (kdebug_flags & KDBG_RANGECHECK) - { - if ((debugid < kdlog_beg) || (debugid >= kdlog_end) - && (debugid >> 24 != DBG_TRACE)) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } - } - else if (kdebug_flags & KDBG_VALCHECK) - { - if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && - (debugid & DBG_FUNC_MASK) != kdlog_value2 && - (debugid & DBG_FUNC_MASK) != kdlog_value3 && - (debugid & DBG_FUNC_MASK) != kdlog_value4 && - (debugid >> 24 != DBG_TRACE)) - { - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - return; - } - } + /* get the number of cpus and cache it */ +#define BSD_HOST 1 + host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); + kd_cpus = hinfo.physical_cpu_max; -record_trace1: - kd = kd_bufptr; - kd->debugid = debugid; - kd->arg1 = arg1; - kd->arg2 = arg2; - kd->arg3 = arg3; - kd->arg4 = arg4; - kd->arg5 = arg5; - - now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; - - /* Watch for out of order timestamps */ - - if (now < kd_prev_timebase) - { - now = ++kd_prev_timebase & KDBG_TIMESTAMP_MASK; - } - else - { - /* Then just store the previous timestamp */ - kd_prev_timebase = now; - } - kd->timestamp = now | (((uint64_t)cpu_number()) << KDBG_CPU_SHIFT); - - kd_bufptr++; - - if (kd_bufptr >= kd_buflast) - kd_bufptr = kd_buffer; - if (kd_bufptr == kd_readlast) { - if (kdebug_flags & KDBG_NOWRAP) - kdebug_slowcheck |= SLOW_NOLOG; - kdebug_flags |= KDBG_WRAPPED; - } - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); -} - - -static void -kdbg_lock_init() -{ + if (kmem_alloc(kernel_map, (unsigned int *)&kdbip, sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS) + return; - if (kdebug_flags & KDBG_LOCKINIT) - return; - /* + /* * allocate lock group attribute and group */ - kd_trace_lock_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(kd_trace_lock_grp_attr); - kd_trace_lock_grp = lck_grp_alloc_init("kdebug", kd_trace_lock_grp_attr); - - kd_trace_mtx_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(kd_trace_mtx_grp_attr); - kd_trace_mtx_grp = lck_grp_alloc_init("kdebug", kd_trace_mtx_grp_attr); + kd_trace_mtx_sysctl_grp_attr = lck_grp_attr_alloc_init(); + kd_trace_mtx_sysctl_grp = lck_grp_alloc_init("kdebug", kd_trace_mtx_sysctl_grp_attr); /* * allocate the lock attribute */ - kd_trace_lock_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(kd_trace_lock_attr); - - kd_trace_mtx_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(kd_trace_mtx_attr); + kd_trace_mtx_sysctl_attr = lck_attr_alloc_init(); /* * allocate and initialize spin lock and mutex */ - kd_trace_lock = lck_spin_alloc_init(kd_trace_lock_grp, kd_trace_lock_attr); - kd_trace_mtx = lck_mtx_alloc_init(kd_trace_mtx_grp, kd_trace_mtx_attr); + kd_trace_mtx_sysctl = lck_mtx_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr); kdebug_flags |= KDBG_LOCKINIT; } int -kdbg_bootstrap() +kdbg_bootstrap(void) { + kdebug_flags &= ~KDBG_WRAPPED; - kd_bufsize = nkdbufs * sizeof(kd_buf); - - if (kmem_alloc(kernel_map, &kd_buftomem, - (vm_size_t)kd_bufsize) == KERN_SUCCESS) - kd_buffer = (kd_buf *) kd_buftomem; - else - kd_buffer= (kd_buf *) 0; - kdebug_flags &= ~KDBG_WRAPPED; - - if (kd_buffer) { - kdebug_flags |= (KDBG_INIT | KDBG_BUFINIT); - kd_bufptr = kd_buffer; - kd_buflast = &kd_bufptr[nkdbufs]; - kd_readlast = kd_bufptr; - kd_prev_timebase = 0LL; - return(0); - } else { - kd_bufsize=0; - kdebug_flags &= ~(KDBG_INIT | KDBG_BUFINIT); - return(EINVAL); - } - + return (create_buffers()); } -kdbg_reinit() +int +kdbg_reinit(void) { - int s; int ret=0; /* @@ -460,17 +444,18 @@ kdbg_reinit() * First make sure we're not in * the middle of cutting a trace */ - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kd_trace_lock); kdebug_enable &= ~KDEBUG_ENABLE_TRACE; kdebug_slowcheck |= SLOW_NOLOG; - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); + /* + * make sure the SLOW_NOLOG is seen + * by everyone that might be trying + * to cut a trace.. + */ + IOSleep(100); - if ((kdebug_flags & KDBG_INIT) && (kdebug_flags & KDBG_BUFINIT) && kd_bufsize && kd_buffer) - kmem_free(kernel_map, (vm_offset_t)kd_buffer, kd_bufsize); + delete_buffers(); if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { @@ -481,12 +466,13 @@ kdbg_reinit() kd_mapcount = 0; } - ret= kdbg_bootstrap(); + ret = kdbg_bootstrap(); return(ret); } -void kdbg_trace_data(struct proc *proc, long *arg_pid) +void +kdbg_trace_data(struct proc *proc, long *arg_pid) { if (!proc) *arg_pid = 0; @@ -497,7 +483,8 @@ void kdbg_trace_data(struct proc *proc, long *arg_pid) } -void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4) +void +kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4) { int i; char *dbg_nameptr; @@ -521,7 +508,7 @@ void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, lo dbg_parms[2]=0L; dbg_parms[3]=0L; - if(dbg_namelen > sizeof(dbg_parms)) + if(dbg_namelen > (int)sizeof(dbg_parms)) dbg_namelen = sizeof(dbg_parms); for(i=0;dbg_namelen > 0; i++) @@ -538,9 +525,10 @@ void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, lo } static void -kdbg_resolve_map(thread_t th_act, krt_t *t) +kdbg_resolve_map(thread_t th_act, void *opaque) { kd_threadmap *mapptr; + krt_t *t = (krt_t *)opaque; if(t->count < t->maxcount) { @@ -563,7 +551,8 @@ kdbg_resolve_map(thread_t th_act, krt_t *t) } } -void kdbg_mapinit() +void +kdbg_mapinit(void) { struct proc *p; struct krt akrt; @@ -631,7 +620,7 @@ void kdbg_mapinit() task_reference(p->task); tts_mapptr[i].task = p->task; tts_mapptr[i].pid = p->p_pid; - (void)strncpy(&tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm) - 1); + (void)strncpy(tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm) - 1); i++; } } @@ -660,34 +649,31 @@ void kdbg_mapinit() static void kdbg_clear(void) { - int s; - /* * Clean up the trace buffer * First make sure we're not in * the middle of cutting a trace */ - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kd_trace_lock); kdebug_enable &= ~KDEBUG_ENABLE_TRACE; kdebug_slowcheck = SLOW_NOLOG; + /* + * make sure the SLOW_NOLOG is seen + * by everyone that might be trying + * to cut a trace.. + */ + IOSleep(100); + if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) kdebug_slowcheck |= SLOW_ENTROPY; - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); - global_state_pid = -1; - kdebug_flags &= ~KDBG_BUFINIT; kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK); kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE); - kmem_free(kernel_map, (vm_offset_t)kd_buffer, kd_bufsize); - kd_buffer = (kd_buf *)0; - kd_bufsize = 0; - kd_prev_timebase = 0LL; + + delete_buffers(); /* Clean up the thread map buffer */ kdebug_flags &= ~KDBG_MAPINIT; @@ -697,6 +683,7 @@ kdbg_clear(void) kd_mapcount = 0; } +int kdbg_setpid(kd_regtype *kdr) { pid_t pid; @@ -734,6 +721,7 @@ kdbg_setpid(kd_regtype *kdr) } /* This is for pid exclusion in the trace buffer */ +int kdbg_setpidex(kd_regtype *kdr) { pid_t pid; @@ -771,6 +759,7 @@ kdbg_setpidex(kd_regtype *kdr) } /* This is for setting a maximum decrementer value */ +int kdbg_setrtcdec(kd_regtype *kdr) { int ret=0; @@ -782,9 +771,6 @@ kdbg_setrtcdec(kd_regtype *kdr) ret = EINVAL; #ifdef ppc else { - - extern uint32_t maxDec; - maxDec = decval ? decval : 0x7FFFFFFF; /* Set or reset the max decrementer */ } #else @@ -795,9 +781,10 @@ kdbg_setrtcdec(kd_regtype *kdr) return(ret); } +int kdbg_setreg(kd_regtype * kdr) { - int i,j, ret=0; + int ret=0; unsigned int val_1, val_2, val; switch (kdr->type) { @@ -858,11 +845,13 @@ kdbg_setreg(kd_regtype * kdr) return(ret); } -kdbg_getreg(kd_regtype * kdr) +int +kdbg_getreg(__unused kd_regtype * kdr) { +#if 0 int i,j, ret=0; unsigned int val_1, val_2, val; -#if 0 + switch (kdr->type) { case KDBG_CLASSTYPE : val_1 = (kdr->value1 & 0xff); @@ -906,7 +895,7 @@ kdbg_readmap(user_addr_t buffer, size_t *number) { int avail = *number; int ret = 0; - int count = 0; + unsigned int count = 0; count = avail/sizeof (kd_threadmap); @@ -945,7 +934,6 @@ kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout) { int avail = *number; int ret = 0; - int count = 0; /* The number of timestamp entries that will fill buffer */ if (kd_entropy_buffer) return(EBUSY); @@ -1014,7 +1002,8 @@ kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout) * address of the enabled kdebug_chudhook function */ -void kdbg_control_chud(int val, void *fn) +void +kdbg_control_chud(int val, void *fn) { if (val) { /* enable chudhook */ @@ -1029,11 +1018,12 @@ void kdbg_control_chud(int val, void *fn) } -kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) +int +kdbg_control(int *name, __unused u_int namelen, user_addr_t where, size_t *sizep) { int ret=0; - int size=*sizep; - int max_entries; + size_t size=*sizep; + unsigned int max_entries; unsigned int value = name[1]; kd_regtype kd_Reg; kbufinfo_t kd_bufinfo; @@ -1042,7 +1032,11 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) kdbg_lock_init(); - lck_mtx_lock(kd_trace_mtx); + + if ( !(kdebug_flags & KDBG_LOCKINIT)) + return(ENOMEM); + + lck_mtx_lock(kd_trace_mtx_sysctl); if (name[0] == KERN_KDGETBUF) { /* @@ -1054,7 +1048,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) * There is not enough room to return even * the first element of the info structure. */ - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(EINVAL); } @@ -1073,7 +1067,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) * Provide all the info we have */ if (copyout (&kd_bufinfo, where, sizeof(kd_bufinfo))) { - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(EINVAL); } @@ -1084,12 +1078,12 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) * as much info as there is room for. */ if (copyout (&kd_bufinfo, where, size)) { - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(EINVAL); } } - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(0); } else if (name[0] == KERN_KDGETENTROPY) { @@ -1097,15 +1091,15 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = EBUSY; else ret = kdbg_getentropy(where, sizep, value); - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return (ret); } - if (curproc = current_proc()) + if ((curproc = current_proc()) != NULL) curpid = curproc->p_pid; else { - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return (ESRCH); } @@ -1121,7 +1115,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) /* * The global pid exists, deny this request */ - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(EBUSY); } @@ -1145,6 +1139,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret=EINVAL; break; } + kdbg_mapinit(); + kdebug_enable |= KDEBUG_ENABLE_TRACE; kdebug_slowcheck &= ~SLOW_NOLOG; } @@ -1153,7 +1149,6 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) kdebug_enable &= ~KDEBUG_ENABLE_TRACE; kdebug_slowcheck |= SLOW_NOLOG; } - kdbg_mapinit(); break; case KERN_KDSETBUF: /* We allow a maximum buffer size of 25% of either ram or max mapped address, whichever is smaller */ @@ -1234,109 +1229,287 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) default: ret= EINVAL; } - lck_mtx_unlock(kd_trace_mtx); + lck_mtx_unlock(kd_trace_mtx_sysctl); return(ret); } + +/* + * This code can run concurrently with kernel_debug_internal() + * without the need of any locks, because all reads of kd_bufptr[i], + * which get modified by kernel_debug_internal(), are safe. + */ +int kdbg_read(user_addr_t buffer, size_t *number) { -int avail=*number; -int count=0; -int copycount=0; -int totalcount=0; -int s; -unsigned int my_kdebug_flags; -kd_buf * my_kd_bufptr; + unsigned int count; + unsigned int cpu; + int mincpu; + uint64_t mintime, t, last_wrap_time; + int last_wrap_cpu; + int error = 0; + kd_buf *tempbuf; + uint32_t tempbuf_count; + uint32_t tempbuf_number; + unsigned int old_kdebug_flags, new_kdebug_flags; + unsigned int old_kdebug_slowcheck, new_kdebug_slowcheck; + count = *number/sizeof(kd_buf); + *number = 0; + + if (count == 0 || !(kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0) + return EINVAL; - s = ml_set_interrupts_enabled(FALSE); - lck_spin_lock(kd_trace_lock); + /* + * because we hold kd_trace_mtx_sysctl, no other control threads can + * be playing with kdebug_flags... the code that cuts new events could + * be running, but it only reads kdebug_flags, it doesn't write it.. + * use an OSCompareAndSwap to make sure the other processors see the + * change of state immediately, not to protect against 2 threads racing to update it + */ + old_kdebug_slowcheck = kdebug_slowcheck; + do { + old_kdebug_flags = kdebug_flags; + new_kdebug_flags = old_kdebug_flags & ~KDBG_WRAPPED; + new_kdebug_flags |= KDBG_NOWRAP; + } while ( !OSCompareAndSwap((UInt32)old_kdebug_flags, (UInt32)new_kdebug_flags, (UInt32 *)&kdebug_flags)); + + last_wrap_time = 0; + last_wrap_cpu = -1; + + for (cpu = 0; cpu < kd_cpus; cpu++) { + kd_buf *cur_bufptr; + + if ((cur_bufptr = kdbip[cpu].kd_bufptr) >= kdbip[cpu].kd_buflast) + cur_bufptr = kdbip[cpu].kd_buffer; - my_kdebug_flags = kdebug_flags; - my_kd_bufptr = kd_bufptr; + if (kdbip[cpu].kd_wrapped) { + kdbip[cpu].kd_wrapped = 0; + kdbip[cpu].kd_readlast = cur_bufptr; + kdbip[cpu].kd_stop = cur_bufptr; - lck_spin_unlock(kd_trace_lock); - ml_set_interrupts_enabled(s); + if (kd_cpus > 1 && ((cur_bufptr->timestamp & KDBG_TIMESTAMP_MASK) > last_wrap_time)) { + last_wrap_time = cur_bufptr->timestamp & KDBG_TIMESTAMP_MASK; + last_wrap_cpu = cpu; + } + } else { + if (kdbip[cpu].kd_readlast == cur_bufptr) + kdbip[cpu].kd_stop = 0; + else + kdbip[cpu].kd_stop = cur_bufptr; + } + } + if (count > nkdbufs) + count = nkdbufs; - count = avail/sizeof(kd_buf); + if ((tempbuf_count = count) > KDCOPYBUF_COUNT) + tempbuf_count = KDCOPYBUF_COUNT; - if (count) { - if ((my_kdebug_flags & KDBG_BUFINIT) && kd_bufsize && kd_buffer) { - if (count > nkdbufs) - count = nkdbufs; - - if (!(my_kdebug_flags & KDBG_WRAPPED)) { - if (my_kd_bufptr == kd_readlast) { - *number = 0; - return(0); - } - if (my_kd_bufptr > kd_readlast) { - copycount = my_kd_bufptr - kd_readlast; - if (copycount > count) - copycount = count; - - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) { - *number = 0; - return(EINVAL); - } - kd_readlast += copycount; - *number = copycount; - return(0); + while (count) { + tempbuf = kdcopybuf; + tempbuf_number = 0; + + while (tempbuf_count) { + mintime = 0xffffffffffffffffULL; /* all actual timestamps are below */ + mincpu = -1; + + for (cpu = 0; cpu < kd_cpus; cpu++) { + if (kdbip[cpu].kd_stop == 0) /* empty buffer */ + continue; + t = kdbip[cpu].kd_readlast[0].timestamp & KDBG_TIMESTAMP_MASK; + + if (t < mintime) { + mintime = t; + mincpu = cpu; } } - if ( (my_kdebug_flags & KDBG_WRAPPED) ) { - /* Note that by setting kd_readlast equal to my_kd_bufptr, - * we now treat the kd_buffer read the same as if we weren't - * wrapped and my_kd_bufptr was less than kd_readlast. + if (mincpu < 0) + /* + * all buffers ran empty early */ - kd_readlast = my_kd_bufptr; - kdebug_flags &= ~KDBG_WRAPPED; + break; + + if (last_wrap_cpu == mincpu) { + tempbuf->debugid = MISCDBG_CODE(DBG_BUFFER, 0) | DBG_FUNC_NONE; + tempbuf->arg1 = 0; + tempbuf->arg2 = 0; + tempbuf->arg3 = 0; + tempbuf->arg4 = 0; + tempbuf->arg5 = (int)current_thread(); + + tempbuf->timestamp = last_wrap_time | (((uint64_t)last_wrap_cpu) << KDBG_CPU_SHIFT); + + tempbuf++; + + last_wrap_cpu = -1; + + } else { + *(tempbuf++) = kdbip[mincpu].kd_readlast[0]; + + kdbip[mincpu].kd_readlast++; + + if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_buflast) + kdbip[mincpu].kd_readlast = kdbip[mincpu].kd_buffer; + if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_stop) + kdbip[mincpu].kd_stop = 0; } - /* - * first copyout from readlast to end of kd_buffer - */ - copycount = kd_buflast - kd_readlast; - if (copycount > count) - copycount = count; - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) { + tempbuf_count--; + tempbuf_number++; + } + if (tempbuf_number) { + if ((error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf)))) { *number = 0; - return(EINVAL); - } - buffer += (copycount * sizeof(kd_buf)); - count -= copycount; - totalcount = copycount; - kd_readlast += copycount; - - if (kd_readlast == kd_buflast) - kd_readlast = kd_buffer; - if (count == 0) { - *number = totalcount; - return(0); - } - /* second copyout from top of kd_buffer to bufptr */ - copycount = my_kd_bufptr - kd_readlast; - if (copycount > count) - copycount = count; - if (copycount == 0) { - *number = totalcount; - return(0); + error = EINVAL; + break; } - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) - return(EINVAL); - - kd_readlast += copycount; - totalcount += copycount; - *number = totalcount; - return(0); + count -= tempbuf_number; + *number += tempbuf_number; + buffer += (tempbuf_number * sizeof(kd_buf)); + } + if (tempbuf_count) + /* + * all trace buffers are empty + */ + break; - } /* end if KDBG_BUFINIT */ - } /* end if count */ - return (EINVAL); + if ((tempbuf_count = count) > KDCOPYBUF_COUNT) + tempbuf_count = KDCOPYBUF_COUNT; + } + if ( !(old_kdebug_flags & KDBG_NOWRAP)) { + do { + old_kdebug_flags = kdebug_flags; + new_kdebug_flags = old_kdebug_flags & ~KDBG_NOWRAP; + } while ( !OSCompareAndSwap((UInt32)old_kdebug_flags, (UInt32)new_kdebug_flags, (UInt32 *)&kdebug_flags)); + + if ( !(old_kdebug_slowcheck & SLOW_NOLOG)) { + do { + old_kdebug_slowcheck = kdebug_slowcheck; + new_kdebug_slowcheck = old_kdebug_slowcheck & ~SLOW_NOLOG; + } while ( !OSCompareAndSwap((UInt32)old_kdebug_slowcheck, (UInt32)new_kdebug_slowcheck, (UInt32 *)&kdebug_slowcheck)); + } + } + return (error); } + unsigned char *getProcName(struct proc *proc); unsigned char *getProcName(struct proc *proc) { return (unsigned char *) &proc->p_comm; /* Return pointer to the proc name */ } + +#define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex) +#define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex) +#ifdef __i386__ +#define TRAP_DEBUGGER __asm__ volatile("int3"); +#endif +#ifdef __ppc__ +#define TRAP_DEBUGGER __asm__ volatile("tw 4,r3,r3"); +#endif + +#define SANE_TRACEBUF_SIZE 2*1024*1024 + +/* Initialize the mutex governing access to the stack snapshot subsystem */ +__private_extern__ void +stackshot_lock_init( void ) +{ + stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + + stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr); + + stackshot_subsys_lck_attr = lck_attr_alloc_init(); + + lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr); +} + +/* + * stack_snapshot: Obtains a coherent set of stack traces for all threads + * on the system, tracing both kernel and user stacks + * where available. Uses machine specific trace routines + * for ppc, ppc64 and x86. + * Inputs: uap->pid - process id of process to be traced, or -1 + * for the entire system + * uap->tracebuf - address of the user space destination + * buffer + * uap->tracebuf_size - size of the user space trace buffer + * uap->options - various options, including the maximum + * number of frames to trace. + * Outputs: EPERM if the caller is not privileged + * EINVAL if the supplied trace buffer isn't sanely sized + * ENOMEM if we don't have enough memory to satisfy the + * request + * ENOENT if the target pid isn't found + * ENOSPC if the supplied buffer is insufficient + * *retval contains the number of bytes traced, if successful + * and -1 otherwise. If the request failed due to + * tracebuffer exhaustion, we copyout as much as possible. + */ +int +stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, register_t *retval) { + int error = 0; + + if ((error = suser(kauth_cred_get(), &p->p_acflag))) + return(error); + + return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size, + uap->options, retval); +} + +int +stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, register_t *retval) +{ + int error = 0; + unsigned bytesTraced = 0; + + *retval = -1; +/* Serialize tracing */ + STACKSHOT_SUBSYS_LOCK(); + + if ((tracebuf_size <= 0) || (tracebuf_size > SANE_TRACEBUF_SIZE)) { + error = EINVAL; + goto error_exit; + } + + MALLOC(stackshot_snapbuf, void *, tracebuf_size, M_TEMP, M_WAITOK); + + if (stackshot_snapbuf == NULL) { + error = ENOMEM; + goto error_exit; + } +/* Preload trace parameters*/ + kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, options); + +/* Trap to the debugger to obtain a coherent stack snapshot; this populates + * the trace buffer + */ + TRAP_DEBUGGER; + + bytesTraced = kdp_stack_snapshot_bytes_traced(); + + if (bytesTraced > 0) { + if ((error = copyout(stackshot_snapbuf, tracebuf, + ((bytesTraced < tracebuf_size) ? + bytesTraced : tracebuf_size)))) + goto error_exit; + *retval = bytesTraced; + } + else { + error = ENOENT; + goto error_exit; + } + + error = kdp_stack_snapshot_geterror(); + if (error == -1) { + error = ENOSPC; + *retval = -1; + goto error_exit; + } + +error_exit: + if (stackshot_snapbuf != NULL) + FREE(stackshot_snapbuf, M_TEMP); + stackshot_snapbuf = NULL; + STACKSHOT_SUBSYS_UNLOCK(); + return error; +} diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index e913df014..58f79b324 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -2202,10 +2202,8 @@ aio_init( void ) int i; aio_lock_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(aio_lock_grp_attr); aio_lock_grp = lck_grp_alloc_init("aio", aio_lock_grp_attr); aio_lock_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(aio_lock_attr); aio_lock = lck_mtx_alloc_init(aio_lock_grp, aio_lock_attr); diff --git a/bsd/kern/kern_authorization.c b/bsd/kern/kern_authorization.c index b5dbe6706..d7bf6c916 100644 --- a/bsd/kern/kern_authorization.c +++ b/bsd/kern/kern_authorization.c @@ -858,14 +858,32 @@ kauth_acl_inherit(vnode_t dvp, kauth_acl_t initial, kauth_acl_t *product, int is /* * Optimistically copy in a kauth_filesec structure + * * Parameters: xsecurity user space kauth_filesec_t - * xsecdstpp pointer to kauth_filesec_t + * xsecdstpp pointer to kauth_filesec_t to be + * modified to contain the contain a + * pointer to an allocated copy of the + * user space argument + * + * Returns: 0 Success + * ENOMEM Insufficient memory for the copy. + * EINVAL The user space data was invalid, or + * there were too many ACE entries. + * EFAULT The user space address was invalid; + * this may mean 'fsec_entrycount' in + * the user copy is corrupt/incorrect. + * + * Implicit returns: xsecdestpp, modified (only if successful!) + * + * Notes: The returned kauth_filesec_t is in host byte order + * + * The caller is responsible for freeing the returned + * kauth_filesec_t in the success case using the function + * kauth_filesec_free() * - * Returns: 0 on success, EINVAL or EFAULT depending on failure mode. - * Modifies: xsecdestpp, which contains a pointer to an allocated - * and copied-in kauth_filesec_t + * Our largest initial guess is 32; this needs to move to + * a manifest constant in . */ - int kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) { @@ -913,6 +931,7 @@ kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) if ((fsec->fsec_entrycount != KAUTH_FILESEC_NOACL) && (fsec->fsec_entrycount > count)) { if (fsec->fsec_entrycount > KAUTH_ACL_MAX_ENTRIES) { + /* XXX This should be E2BIG */ error = EINVAL; goto out; } @@ -932,7 +951,23 @@ kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) } /* - * Allocate a filesec structure. + * Allocate a block of memory containing a filesec structure, immediately + * followed by 'count' kauth_ace structures. + * + * Parameters: count Number of kauth_ace structures needed + * + * Returns: !NULL A pointer to the allocated block + * NULL Invalid 'count' or insufficient memory + * + * Notes: Returned memory area assumes that the structures are packed + * densely, so this function may only be used by code that also + * assumes no padding following structures. + * + * The returned structure must be freed by the caller using the + * function kauth_filesec_free(), in case we decide to use an + * allocation mechanism that is aware of the object size at some + * point, since the object size is only available by introspecting + * the object itself. */ kauth_filesec_t kauth_filesec_alloc(int count) @@ -954,6 +989,18 @@ kauth_filesec_alloc(int count) return(fsp); } +/* + * Free a kauth_filesec_t that was previous allocated, either by a direct + * call to kauth_filesec_alloc() or by calling a function that calls it. + * + * Parameters: fsp kauth_filesec_t to free + * + * Returns: (void) + * + * Notes: The kauth_filesec_t to be freed is assumed to be in host + * byte order so that this function can introspect it in the + * future to determine its size, if necesssary. + */ void kauth_filesec_free(kauth_filesec_t fsp) { @@ -966,6 +1013,73 @@ kauth_filesec_free(kauth_filesec_t fsp) FREE(fsp, M_KAUTH); } +/* + * Set the endianness of a filesec and an ACL; if 'acl' is NULL, use the + * ACL interior to 'fsec' instead. If the endianness doesn't change, then + * this function will have no effect. + * + * Parameters: kendian The endianness to set; this is either + * KAUTH_ENDIAN_HOST or KAUTH_ENDIAN_DISK. + * fsec The filesec to convert. + * acl The ACL to convert (optional) + * + * Returns: (void) + * + * Notes: We use ntohl() because it has a transitive property on Intel + * machines and no effect on PPC mancines. This guarantees us + * that the swapping only occurs if the endiannes is wrong. + */ +void +kauth_filesec_acl_setendian(int kendian, kauth_filesec_t fsec, kauth_acl_t acl) +{ + uint32_t compare_magic = KAUTH_FILESEC_MAGIC; + uint32_t invert_magic = ntohl(KAUTH_FILESEC_MAGIC); + uint32_t compare_acl_entrycount; + uint32_t i; + + if (compare_magic == invert_magic) + return; + + /* If no ACL, use ACL interior to 'fsec' instead */ + if (acl == NULL) + acl = &fsec->fsec_acl; + + compare_acl_entrycount = acl->acl_entrycount; + + /* + * Only convert what needs to be converted, and only if the arguments + * are valid. The following switch and tests effectively reject + * conversions on invalid magic numbers as a desirable side effect. + */ + switch(kendian) { + case KAUTH_ENDIAN_HOST: /* not in host, convert to host */ + if (fsec->fsec_magic != invert_magic) + return; + /* acl_entrycount is byteswapped */ + compare_acl_entrycount = ntohl(acl->acl_entrycount); + break; + case KAUTH_ENDIAN_DISK: /* not in disk, convert to disk */ + if (fsec->fsec_magic != compare_magic) + return; + break; + default: /* bad argument */ + return; + } + + /* We are go for conversion */ + fsec->fsec_magic = ntohl(fsec->fsec_magic); + acl->acl_entrycount = ntohl(acl->acl_entrycount); + if (compare_acl_entrycount != KAUTH_FILESEC_NOACL) { + acl->acl_flags = ntohl(acl->acl_flags); + + /* swap ACE rights and flags */ + for (i = 0; i < compare_acl_entrycount; i++) { + acl->acl_ace[i].ace_flags = ntohl(acl->acl_ace[i].ace_flags); + acl->acl_ace[i].ace_rights = ntohl(acl->acl_ace[i].ace_rights); + } + } + } + /* * Allocate an ACL buffer. diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index 95c9cbe2e..daac86d46 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -68,45 +68,6 @@ static lck_attr_t *ctl_lck_attr = 0; static lck_grp_t *ctl_lck_grp = 0; static lck_mtx_t *ctl_mtx; -/* - * internal structure maintained for each register controller - */ - -struct ctl_cb; - -struct kctl -{ - TAILQ_ENTRY(kctl) next; /* controller chain */ - - /* controller information provided when registering */ - char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ - u_int32_t id; - u_int32_t reg_unit; - - /* misc communication information */ - u_int32_t flags; /* support flags */ - u_int32_t recvbufsize; /* request more than the default buffer size */ - u_int32_t sendbufsize; /* request more than the default buffer size */ - - /* Dispatch functions */ - ctl_connect_func connect; /* Make contact */ - ctl_disconnect_func disconnect; /* Break contact */ - ctl_send_func send; /* Send data to nke */ - ctl_setopt_func setopt; /* set kctl configuration */ - ctl_getopt_func getopt; /* get kctl configuration */ - - TAILQ_HEAD(, ctl_cb) kcb_head; - u_int32_t lastunit; -}; - -struct ctl_cb { - TAILQ_ENTRY(ctl_cb) next; /* controller chain */ - lck_mtx_t *mtx; - struct socket *so; /* controlling socket */ - struct kctl *kctl; /* back pointer to controller */ - u_int32_t unit; - void *userdata; -}; /* all the controllers are chained */ TAILQ_HEAD(, kctl) ctl_head; @@ -179,7 +140,6 @@ kern_control_init(void) error = ENOMEM; goto done; } - lck_grp_attr_setdefault(ctl_lck_grp_attr); ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol", ctl_lck_grp_attr); if (ctl_lck_grp == 0) { @@ -194,7 +154,6 @@ kern_control_init(void) error = ENOMEM; goto done; } - lck_attr_setdefault(ctl_lck_attr); ctl_mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr); if (ctl_mtx == 0) { @@ -920,12 +879,9 @@ static int ctl_lock(struct socket *so, int refcount, int lr) { int lr_saved; -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif if (so->so_pcb) { lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx); @@ -940,7 +896,9 @@ ctl_lock(struct socket *so, int refcount, int lr) if (refcount) so->so_usecount++; - so->reserved3 = (void *)lr_saved; + + so->lock_lr[so->next_lock_lr] = (void *)lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } @@ -950,12 +908,9 @@ ctl_unlock(struct socket *so, int refcount, int lr) int lr_saved; lck_mtx_t * mutex_held; -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif #ifdef MORE_KCTLLOCK_DEBUG printf("ctl_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", @@ -973,8 +928,9 @@ ctl_unlock(struct socket *so, int refcount, int lr) mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx; } lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); - so->reserved4 = (void *)lr_saved; if (so->so_usecount == 0) ctl_sofreelastref(so); diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index 105181c1a..2b4b3580b 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -73,17 +73,11 @@ mythread_state_flavor_t thread_flavor_array[]={ int mynum_flavors=4; #elif defined (__i386__) mythread_state_flavor_t thread_flavor_array [] = { - {i386_THREAD_STATE, i386_THREAD_STATE_COUNT}, - {i386_THREAD_FPSTATE, i386_THREAD_FPSTATE_COUNT}, - {i386_THREAD_EXCEPTSTATE, i386_THREAD_EXCEPTSTATE_COUNT}, - {i386_THREAD_CTHREADSTATE, i386_THREAD_CTHREADSTATE_COUNT}, - {i386_NEW_THREAD_STATE, i386_NEW_THREAD_STATE_COUNT}, - {i386_FLOAT_STATE, i386_FLOAT_STATE_COUNT}, - {i386_ISA_PORT_MAP_STATE, i386_ISA_PORT_MAP_STATE_COUNT}, - {i386_V86_ASSIST_STATE, i386_V86_ASSIST_STATE_COUNT}, - {THREAD_SYSCALL_STATE, i386_THREAD_SYSCALL_STATE_COUNT} + {x86_THREAD_STATE, x86_THREAD_STATE_COUNT}, + {x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT}, + {x86_EXCEPTION_STATE, x86_EXCEPTION_STATE_COUNT}, }; -int mynum_flavors=9; +int mynum_flavors=3; #else #error architecture not supported diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 1ec789877..7cff95e66 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -106,7 +106,6 @@ int fdgetf_noref(struct proc *p, int fd, struct fileproc **resultfp); void fg_drop(struct fileproc * fp); void fg_free(struct fileglob *fg); void fg_ref(struct fileproc * fp); -int fp_getfpshm(struct proc *p, int fd, struct fileproc **resultfp, struct pshmnode **resultpshm); static int closef_finish(struct fileproc *fp, struct fileglob *fg, struct proc *p); @@ -149,13 +148,11 @@ file_lock_init(void) /* allocate file lock group attribute and group */ file_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(file_lck_grp_attr); file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr); /* Allocate file lock attribute */ file_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(file_lck_attr); uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr); file_iterate_lcok = lck_mtx_alloc_init(file_lck_grp, file_lck_attr); @@ -1502,6 +1499,42 @@ fp_getfvp(p, fd, resultfp, resultvp) } + +int +fp_getfvpandvid(p, fd, resultfp, resultvp, vidp) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct vnode **resultvp; + uint32_t * vidp; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_VNODE) { + proc_fdunlock(p); + return(ENOTSUP); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultvp) + *resultvp = (struct vnode *)fp->f_data; + if (vidp) + *vidp = (uint32_t)vnode_vid((struct vnode *)fp->f_data); + proc_fdunlock(p); + + return (0); +} + /* * Returns: EBADF The file descriptor is invalid * EOPNOTSUPP The file descriptor is not a socket @@ -1640,6 +1673,74 @@ fp_getfpsem(p, fd, resultfp, resultpsem) return (0); } + + +int +fp_getfpipe(p, fd, resultfp, resultpipe) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct pipe **resultpipe; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_PIPE) { + proc_fdunlock(p); + return(EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultpipe) + *resultpipe = (struct pipe *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + + +#define DTYPE_ATALK -1 +int +fp_getfatalk(p, fd, resultfp, resultatalk) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct atalk **resultatalk; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != (DTYPE_ATALK+1)) { + proc_fdunlock(p); + return(EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultatalk) + *resultatalk = (struct atalk *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + int fp_lookup(p, fd, resultfp, locked) struct proc *p; diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index d9392549b..76e0ec791 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include @@ -411,6 +412,7 @@ filt_procattach(struct knote *kn) } kn->kn_flags |= EV_CLEAR; /* automatically set */ + kn->kn_hookid = 1; /* mark exit not seen */ /* * internal flag indicating registration done by kernel @@ -431,11 +433,12 @@ filt_procattach(struct knote *kn) /* * The knote may be attached to a different process, which may exit, - * leaving nothing for the knote to be attached to. So when the process - * exits, the knote is marked as DETACHED and also flagged as ONESHOT so - * it will be deleted when read out. However, as part of the knote deletion, - * this routine is called, so a check is needed to avoid actually performing - * a detach, because the original process does not exist any more. + * leaving nothing for the knote to be attached to. In that case, + * we wont be able to find the process from its pid. But the exit + * code may still be processing the knote list for the target process. + * We may have to wait for that processing to complete before we can + * return (and presumably free the knote) without actually removing + * it from the dead process' knote list. */ static void filt_procdetach(struct knote *kn) @@ -446,66 +449,81 @@ filt_procdetach(struct knote *kn) funnel_state = thread_funnel_set(kernel_flock, TRUE); p = pfind(kn->kn_id); - if (p != (struct proc *)NULL) + if (p != (struct proc *)NULL) { KNOTE_DETACH(&p->p_klist, kn); - + } else if (kn->kn_hookid != 0) { /* if not NOTE_EXIT yet */ + kn->kn_hookid = -1; /* we are detaching but... */ + assert_wait(&kn->kn_hook, THREAD_UNINT); /* have to wait */ + thread_block(THREAD_CONTINUE_NULL); + } thread_funnel_set(kernel_flock, funnel_state); } static int filt_proc(struct knote *kn, long hint) { - u_int event; - int funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (hint != 0) { + u_int event; - /* - * mask off extra data - */ - event = (u_int)hint & NOTE_PCTRLMASK; + /* must hold the funnel when coming from below */ + assert(thread_funnel_get() != (funnel_t)0); - /* - * if the user is interested in this event, record it. - */ - if (kn->kn_sfflags & event) - kn->kn_fflags |= event; + /* + * mask off extra data + */ + event = (u_int)hint & NOTE_PCTRLMASK; - /* - * process is gone, so flag the event as finished. - */ - if (event == NOTE_EXIT) { - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - thread_funnel_set(kernel_flock, funnel_state); - return (1); - } + /* + * if the user is interested in this event, record it. + */ + if (kn->kn_sfflags & event) + kn->kn_fflags |= event; - /* - * process forked, and user wants to track the new process, - * so attach a new knote to it, and immediately report an - * event with the parent's pid. - */ - if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { - struct kevent kev; - int error; + /* + * process is gone, so flag the event as finished. + * + * If someone was trying to detach, but couldn't + * find the proc to complete the detach, wake them + * up (nothing will ever need to walk the per-proc + * knote list again - so its safe for them to dump + * the knote now). + */ + if (event == NOTE_EXIT) { + boolean_t detaching = (kn->kn_hookid == -1); + + kn->kn_hookid = 0; + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + if (detaching) + thread_wakeup(&kn->kn_hookid); + return (1); + } /* - * register knote with new process. + * process forked, and user wants to track the new process, + * so attach a new knote to it, and immediately report an + * event with the parent's pid. */ - kev.ident = hint & NOTE_PDATAMASK; /* pid */ - kev.filter = kn->kn_filter; - kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; - kev.fflags = kn->kn_sfflags; - kev.data = kn->kn_id; /* parent */ - kev.udata = kn->kn_kevent.udata; /* preserve udata */ - error = kevent_register(kn->kn_kq, &kev, NULL); - if (error) - kn->kn_fflags |= NOTE_TRACKERR; + if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { + struct kevent kev; + int error; + + /* + * register knote with new process. + */ + kev.ident = hint & NOTE_PDATAMASK; /* pid */ + kev.filter = kn->kn_filter; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; + kev.fflags = kn->kn_sfflags; + kev.data = kn->kn_id; /* parent */ + kev.udata = kn->kn_kevent.udata; /* preserve udata */ + error = kevent_register(kn->kn_kq, &kev, NULL); + if (error) + kn->kn_fflags |= NOTE_TRACKERR; + } } - event = kn->kn_fflags; - thread_funnel_set(kernel_flock, funnel_state); - return (event != 0); + return (kn->kn_fflags != 0); /* atomic check - no funnel needed from above */ } /* @@ -1947,13 +1965,11 @@ knote_init(void) /* allocate kq lock group attribute and group */ kq_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(kq_lck_grp_attr); kq_lck_grp = lck_grp_alloc_init("kqueue", kq_lck_grp_attr); /* Allocate kq lock attribute */ kq_lck_attr = lck_attr_alloc_init(); - lck_attr_setdefault(kq_lck_attr); /* Initialize the timer filter lock */ lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr); @@ -2047,7 +2063,6 @@ kern_event_init(void) * allocate the lock attribute for mutexes */ evt_mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(evt_mtx_attr); evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr); if (evt_mutex == NULL) return (ENOMEM); @@ -2272,4 +2287,23 @@ kev_control(struct socket *so, +int +fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) +{ + struct stat * st; + + /* No need for the funnel as fd is kept alive */ + + st = &kinfo->kq_stat; + + st->st_size = kq->kq_count; + st->st_blksize = sizeof(struct kevent); + st->st_mode = S_IFIFO; + if (kq->kq_state & KQ_SEL) + kinfo->kq_state |= PROC_KQUEUE_SELECT; + if (kq->kq_state & KQ_SLEEP) + kinfo->kq_state |= PROC_KQUEUE_SLEEP; + + return(0); +} diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index e3ed77f2c..2d722a51b 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -166,18 +166,11 @@ static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack, static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); /* XXX forward; should be in headers, but can't be for one reason or another */ -extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); extern void vfork_return(thread_t th_act, struct proc * p, struct proc *p2, register_t *retval); - -extern char classichandler[32]; -extern uint32_t classichandler_fsid; -extern long classichandler_fileid; - - /* * exec_add_string * @@ -284,6 +277,75 @@ exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg) return(error); } +#ifdef IMGPF_POWERPC +/* + * exec_powerpc32_imgact + * + * Implicitly invoke the PowerPC handler for a byte-swapped image magic + * number. This may happen either as a result of an attempt to invoke a + * PowerPC image directly, or indirectly as the interpreter used in an + * interpreter script. + * + * Parameters; struct image_params * image parameter block + * + * Returns: -1 not an PowerPC image (keep looking) + * -3 Success: exec_archhandler_ppc: relookup + * >0 Failure: exec_archhandler_ppc: error number + * + * Note: This image activator does not handle the case of a direct + * invocation of the exec_archhandler_ppc, since in that case, the + * exec_archhandler_ppc itself is not a PowerPC binary; instead, + * binary image activators must recognize the exec_archhandler_ppc; + * This is managed in exec_check_permissions(). + * + * Note: This image activator is limited to 32 bit powerpc images; + * if support for 64 bit powerpc images is desired, it would + * be more in line with this design to write a separate 64 bit + * image activator. + */ +static int +exec_powerpc32_imgact(struct image_params *imgp) +{ + struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata; + int error; + size_t len = 0; + + /* + * Make sure it's a PowerPC binary. If we've already redirected + * from an interpreted file once, don't do it again. + */ + if (mach_header->magic != MH_CIGAM) + return (-1); + + /* If there is no exec_archhandler_ppc, we can't run it */ + if (exec_archhandler_ppc.path[0] == 0) + return (EBADARCH); + + /* + * The PowerPC flag will be set by the exec_check_permissions() + * call anyway; however, we set this flag here so that the relookup + * in execve() does not follow symbolic links, as a side effect. + */ + imgp->ip_flags |= IMGPF_POWERPC; + + /* impute an interpreter */ + error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_name, + IMG_SHSIZE, &len); + if (error) + return (error); + + /* + * provide a replacement string for p->p_comm; we have to use an + * an alternate buffer for this, rather than replacing it directly, + * since the exec may fail and return to the parent. In that case, + * we would have erroneously changed the parent p->p_comm instead. + */ + strncpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN); + imgp->ip_p_comm[MAXCOMLEN] = '\0'; + + return (-3); +} +#endif /* IMGPF_POWERPC */ /* @@ -291,7 +353,7 @@ exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg) * * Image activator for interpreter scripts. If the image begins with the * characters "#!", then it is an interpreter script. Verify that we are - * not already executing in Classic mode, and that the length of the script + * not already executing in PowerPC mode, and that the length of the script * line indicating the interpreter is not in excess of the maximum allowed * size. If this is the case, then break out the arguments, if any, which * are separated by white space, and copy them into the argument save area @@ -319,8 +381,8 @@ exec_shell_imgact(struct image_params *imgp) * Make sure it's a shell script. If we've already redirected * from an interpreted file once, don't do it again. * - * Note: We disallow Classic, since the expectation is that we - * may run a Classic interpreter, but not an interpret a Classic + * Note: We disallow PowerPC, since the expectation is that we + * may run a PowerPC interpreter, but not an interpret a PowerPC * image. This is consistent with historical behaviour. */ if (vdata[0] != '#' || @@ -329,6 +391,10 @@ exec_shell_imgact(struct image_params *imgp) return (-1); } +#ifdef IMGPF_POWERPC + if ((imgp->ip_flags & IMGPF_POWERPC) != 0) + return (EBADARCH); +#endif /* IMGPF_POWERPC */ imgp->ip_flags |= IMGPF_INTERPRET; @@ -485,9 +551,12 @@ exec_mach_imgact(struct image_params *imgp) vm_map_t old_map = VM_MAP_NULL; vm_map_t map; boolean_t clean_regions = FALSE; - shared_region_mapping_t initial_region = NULL; load_return_t lret; load_result_t load_result; + shared_region_mapping_t shared_region, initial_region; +#ifdef IMGPF_POWERPC + int powerpcParent, powerpcImage; +#endif /* IMGPF_POWERPC */ /* * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference @@ -536,11 +605,25 @@ exec_mach_imgact(struct image_params *imgp) imgp->ip_strendp[2] = 0; imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1)); +#ifdef IMGPF_POWERPC + /* + * XXX + * + * Should be factored out; this is here because we might be getting + * invoked this way as the result of a shell script, and the check + * in exec_check_permissions() is not interior to the jump back up + * to the "encapsulated_binary:" label in execve(). + */ + if (imgp->ip_vattr->va_fsid == exec_archhandler_ppc.fsid && + imgp->ip_vattr->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) { + imgp->ip_flags |= IMGPF_POWERPC; + } +#endif /* IMGPF_POWERPC */ if (vfexec) { kern_return_t result; - result = task_create_internal(task, FALSE, &new_task); + result = task_create_internal(task, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT), &new_task); if (result != KERN_SUCCESS) printf("execve: task_create failed. Code: 0x%x\n", result); p->task = new_task; @@ -548,6 +631,12 @@ exec_mach_imgact(struct image_params *imgp) if (p->p_nice != 0) resetpriority(p); map = get_task_map(new_task); + + if (imgp->ip_flags & IMGPF_IS_64BIT) + vm_map_set_64bit(map); + else + vm_map_set_32bit(map); + result = thread_create(new_task, &imgp->ip_vfork_thread); if (result != KERN_SUCCESS) printf("execve: thread_create failed. Code: 0x%x\n", result); @@ -583,7 +672,94 @@ if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) vm_get_shared_region(task, &initial_region); - +#ifdef IMGPF_POWERPC + /* + * If we are transitioning to/from powerpc, then we need to do extra + * work here. + */ + powerpcParent = (p->p_flag & P_TRANSLATED) ? 1 : 0; + powerpcImage = (imgp->ip_flags & IMGPF_POWERPC) ? 1 : 0; + + if (powerpcParent ^ powerpcImage) { + cpu_type_t cpu = (powerpcImage ? CPU_TYPE_POWERPC : cpu_type()); + struct vnode *rootDir = p->p_fd->fd_rdir; + + shared_region = lookup_default_shared_region((int)rootDir, cpu); + if (shared_region == NULL) { + shared_region_mapping_t old_region; + shared_region_mapping_t new_region; + vm_get_shared_region(current_task(), &old_region); + /* grrrr... this sets current_task(), not task + * -- they're different (usually) + */ + shared_file_boot_time_init((int)rootDir,cpu); + if ( current_task() != task ) { + vm_get_shared_region(current_task(),&new_region); + vm_set_shared_region(task,new_region); + vm_set_shared_region(current_task(),old_region); + } + } else { + vm_set_shared_region(task, shared_region); + } + shared_region_mapping_dealloc(initial_region); + } else +#endif /* IMGPF_POWERPC */ + + { + struct shared_region_task_mappings map_info; + shared_region_mapping_t next; + + shared_region_mapping_info(initial_region, + &map_info.text_region, + &map_info.text_size, + &map_info.data_region, + &map_info.data_size, + &map_info.region_mappings, + &map_info.client_base, + &map_info.alternate_base, + &map_info.alternate_next, + &map_info.fs_base, + &map_info.system, + &map_info.flags, + &next); + if (map_info.flags & SHARED_REGION_STANDALONE) { + /* + * We were using a private shared region. + * Try and get back to a system-wide shared region + * with matching "fs_base" (for chroot) and "system" + * (for CPU type). + */ + shared_region = lookup_default_shared_region( + map_info.fs_base, + map_info.system); + if (shared_region == NULL) { + /* + * No system-wide default regions, stick to + * our private region... + */ + } else { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_INFO, + ("shared_region: %p [%d(%s)] " + "exec(\"%s\"): " + "moving from private %p[%x,%x,%x] " + "to default %p\n", + current_thread(), + p->p_pid, p->p_comm, + (imgp->ip_p_comm[0] ? + imgp->ip_p_comm : + imgp->ip_ndp->ni_cnd.cn_nameptr), + initial_region, + map_info.fs_base, + map_info.system, + map_info.flags, + shared_region)); + vm_set_shared_region(task, shared_region); + shared_region_mapping_dealloc(initial_region); + } + } + } + /* * NOTE: An error after this point indicates we have potentially * destroyed or overwrote some process state while attempting an @@ -594,6 +770,7 @@ if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) * We reset the task to 64-bit (or not) here. It may have picked up * a new map, and we need that to reflect its true 64-bit nature. */ + task_set_64bit(task, ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)); @@ -634,7 +811,6 @@ if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) } if (vfexec) { - uthread->uu_ar0 = (void *)get_user_regs(thread); old_map = vm_map_switch(get_task_map(task)); } @@ -662,10 +838,15 @@ if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) /* Adjust the stack */ if (imgp->ip_flags & IMGPF_IS_64BIT) { ap = thread_adjuserstack(thread, -8); - (void)copyoutptr(load_result.mach_header, ap, 8); + error = copyoutptr(load_result.mach_header, ap, 8); } else { ap = thread_adjuserstack(thread, -4); - (void)suword(ap, load_result.mach_header); + error = suword(ap, load_result.mach_header); + } + if (error) { + if (vfexec) + vm_map_switch(old_map); + goto badtoolate; } } @@ -720,32 +901,42 @@ if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; } - { - /* This is for kdebug */ - long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; - - /* Collect the pathname for tracing */ - kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); - - - - if (vfexec) - { - KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, - p->p_pid ,0,0,0, (unsigned int)thread); - KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread); - } - else - { - KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, - p->p_pid ,0,0,0,0); - KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); - } + if (kdebug_enable) { + long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; + + /* + * Collect the pathname for tracing + */ + kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); + + if (vfexec) + { + KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, + p->p_pid ,0,0,0, (unsigned int)thread); + KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread); + } + else + { + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, + p->p_pid ,0,0,0,0); + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); + } } - p->p_flag &= ~P_CLASSIC; +#ifdef IMGPF_POWERPC + /* + * Mark the process as powerpc or not. If powerpc, set the affinity + * flag, which will be used for grading binaries in future exec's + * from the process. + */ + if (((imgp->ip_flags & IMGPF_POWERPC) != 0)) + p->p_flag |= P_TRANSLATED; + else +#endif /* IMGPF_POWERPC */ + p->p_flag &= ~P_TRANSLATED; + p->p_flag &= ~P_AFFINITY; /* * mark as execed, wakeup the process that vforked (if any) and tell @@ -789,6 +980,9 @@ struct execsw { } execsw[] = { { exec_mach_imgact, "Mach-o Binary" }, { exec_fat_imgact, "Fat Binary" }, +#ifdef IMGPF_POWERPC + { exec_powerpc32_imgact, "PowerPC binary" }, +#endif /* IMGPF_POWERPC */ { exec_shell_imgact, "Interpreter Script" }, { NULL, NULL} }; @@ -813,7 +1007,7 @@ execve(struct proc *p, struct execve_args *uap, register_t *retval) int numthreads; int vfexec=0; int once = 1; /* save SGUID-ness for interpreted files */ - char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for Classic */ + char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); int seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); struct vfs_context context; @@ -834,7 +1028,7 @@ execve(struct proc *p, struct execve_args *uap, register_t *retval) imgp->ip_vfs_context = &context; imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); imgp->ip_tws_cache_name = NULL; - imgp->ip_p_comm = alt_p_comm; /* for Classic */ + imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ /* * XXXAUDIT: Currently, we only audit the pathname of the binary. @@ -935,6 +1129,15 @@ execve(struct proc *p, struct execve_args *uap, register_t *retval) nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) | (FOLLOW | LOCKLEAF); +#ifdef IMGPF_POWERPC + /* + * PowerPC does not follow symlinks because the + * code which sets exec_archhandler_ppc.fsid and + * exec_archhandler_ppc.fileid doesn't follow them. + */ + if (imgp->ip_flags & IMGPF_POWERPC) + nd.ni_cnd.cn_flags &= ~FOLLOW; +#endif /* IMGPF_POWERPC */ nd.ni_segflg = UIO_SYSSPACE32; nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name); @@ -1090,6 +1293,27 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) stack = *stackp; + unsigned patharea_len = imgp->ip_argv - imgp->ip_strings; + int envc_add = 0; + +#ifdef IMGPF_POWERPC + /* + * oah750 expects /usr/lib/dyld\0 as the start of the program name. + * It also expects to have a certain environment variable set to 0. + * 50 bytes for each to ensure we have enough space without having + * to count every byte. + */ + char *progname, *envvar; + char progname_str[] = "/usr/lib/dyld"; + char envvar_str[] = "OAH750_CFG_FU_STACK_SIZE=0"; + + if (imgp->ip_flags & IMGPF_POWERPC) { + progname = progname_str; + envvar = envvar_str; + patharea_len += strlen(progname) + strlen(envvar) + 2; + envc_add = 1; + } +#endif /* IMGPF_POWERPC */ /* * Set up pointers to the beginning of the string area, the beginning * of the path area, and the beginning of the pointer area (actually, @@ -1097,8 +1321,8 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) * but we use ptr_size worth of space for it, for alignment). */ string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size; - path_area = string_area - (((imgp->ip_argv - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)); - ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4) * ptr_size) - ptr_size /*argc*/; + path_area = string_area - ((patharea_len + ptr_size-1) & ~(ptr_size-1)); + ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4 + envc_add) * ptr_size) - ptr_size /*argc*/; /* Return the initial stack address: the location of argc */ *stackp = ptr_area; @@ -1117,8 +1341,20 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) * copy it just before the string area. */ len = 0; +#ifdef IMGPF_POWERPC + if (imgp->ip_flags & IMGPF_POWERPC) { + error = copyoutstr(progname, path_area, + patharea_len, + (size_t *)&len); + if (error) + goto bad; + error = copyoutstr(imgp->ip_strings, path_area + strlen(progname) + 1, + patharea_len, + (size_t *)&len); + } else +#endif /* IMGPF_POWERPC */ error = copyoutstr(imgp->ip_strings, path_area, - (unsigned)(imgp->ip_argv - imgp->ip_strings), + patharea_len, (size_t *)&len); if (error) goto bad; @@ -1161,6 +1397,27 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) /* argv[n] = NULL */ (void)copyoutptr(0LL, ptr_area, ptr_size); ptr_area += ptr_size; +#ifdef IMGPF_POWERPC + if (envc_add) { + (void)copyoutptr(string_area, ptr_area, ptr_size); + + do { + if (strspace <= 0) { + error = E2BIG; + break; + } + error = copyoutstr(envvar, string_area, + (unsigned)strspace, + (size_t *)&len); + string_area += len; + envvar += len; + strspace -= len; + } while (error == ENAMETOOLONG); + if (error == EFAULT || error == E2BIG) + break; + ptr_area += ptr_size; + } +#endif /* IMGPF_POWERPC */ } if (--stringc < 0) break; @@ -1355,6 +1612,17 @@ exec_check_permissions(struct image_params *imgp) if (vp->v_writecount) return (ETXTBSY); +#ifdef IMGPF_POWERPC + /* + * If the file we are about to attempt to load is the exec_handler_ppc, + * which is determined by matching the vattr fields against previously + * cached values, then we set the PowerPC environment flag. + */ + if (vap->va_fsid == exec_archhandler_ppc.fsid && + vap->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) { + imgp->ip_flags |= IMGPF_POWERPC; + } +#endif /* IMGPF_POWERPC */ /* XXX May want to indicate to underlying FS that vnode is open */ diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index ed6597a1d..0215a91a5 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -252,9 +252,9 @@ procdup(struct proc *child, struct proc *parent) kern_return_t result; if (parent->task == kernel_task) - result = task_create_internal(TASK_NULL, FALSE, &task); + result = task_create_internal(TASK_NULL, FALSE, FALSE, &task); else - result = task_create_internal(parent->task, TRUE, &task); + result = task_create_internal(parent->task, TRUE, (parent->p_flag & P_LP64), &task); if (result != KERN_SUCCESS) printf("fork/procdup: task_create failed. Code: 0x%x\n", result); child->task = task; @@ -262,15 +262,25 @@ procdup(struct proc *child, struct proc *parent) set_bsdtask_info(task, child); if (parent->p_flag & P_LP64) { task_set_64bit(task, TRUE); + vm_map_set_64bit(get_task_map(task)); child->p_flag |= P_LP64; -#ifdef __PPC__ /* LP64todo - clean up this hacked mapping of commpage */ pmap_map_sharedpage(task, get_map_pmap(get_task_map(task))); vm_map_commpage64(get_task_map(task)); -#endif /* __PPC__ */ } else { task_set_64bit(task, FALSE); + vm_map_set_32bit(get_task_map(task)); child->p_flag &= ~P_LP64; +#ifdef __i386__ + /* + * On Intel, the comm page doesn't get mapped automatically + * because it goes beyond the end of the VM map in the current + * 3GB/1GB address space model. + * XXX This explicit mapping will probably become unnecessary + * when we switch to the new 4GB/4GB address space model. + */ + vm_map_commpage32(get_task_map(task)); +#endif /* __i386__ */ } if (child->p_nice != 0) resetpriority(child); @@ -494,7 +504,7 @@ forkproc(p1, lock) * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ - p2->p_flag = (p1->p_flag & (P_LP64 | P_CLASSIC | P_AFFINITY)); + p2->p_flag = (p1->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); if (p1->p_flag & P_PROFIL) startprofclock(p2); /* diff --git a/bsd/kern/kern_ktrace.c b/bsd/kern/kern_ktrace.c index c77a03c90..5f3794b7e 100644 --- a/bsd/kern/kern_ktrace.c +++ b/bsd/kern/kern_ktrace.c @@ -107,7 +107,7 @@ void ktrsyscall(p, code, narg, args) struct proc *p; int code, narg; - u_int64_t args[]; + syscall_arg_t args[]; { #if KTRACE struct vnode *vp; diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index 5ae60405a..3a61d1338 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -81,6 +81,7 @@ #include #include #include +#include #include diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index 11d967a42..adbd53608 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -92,6 +92,7 @@ #include #include #include +#include extern vm_map_t bsd_pageable_map; @@ -104,7 +105,15 @@ extern vm_map_t bsd_pageable_map; #include #include -static int cputype, cpusubtype, cputhreadtype; +#ifdef __i386__ +#include /* for cpuid_info() */ +#endif + +#ifndef MAX +#define MAX(a,b) (a >= b ? a : b) +#endif + +static int cputype, cpusubtype, cputhreadtype, cpufamily, cacheconfig[10];; SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); @@ -265,8 +274,10 @@ sysctl_hw_generic SYSCTL_HANDLER_ARGS if (epochTemp == -1) return(EINVAL); return(SYSCTL_RETURN(req, epochTemp)); - case HW_VECTORUNIT: - return(SYSCTL_RETURN(req, cpu_info.vector_unit)); + case HW_VECTORUNIT: { + int vector = cpu_info.vector_unit == 0? 0 : 1; + return(SYSCTL_RETURN(req, vector)); + } case HW_L2SETTINGS: if (cpu_info.l2_cache_size == 0xFFFFFFFF) return(EINVAL); @@ -299,6 +310,8 @@ SYSCTL_PROC (_hw, OID_AUTO, logicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLA SYSCTL_INT (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN, NULL, BYTE_ORDER, ""); SYSCTL_INT (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN, &cputype, 0, ""); SYSCTL_INT (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN, &cpusubtype, 0, ""); +SYSCTL_INT (_hw, OID_AUTO, cpufamily, CTLFLAG_RD | CTLFLAG_KERN, &cpufamily, 0, ""); +SYSCTL_OPAQUE (_hw, OID_AUTO, cacheconfig, CTLFLAG_RD, &cacheconfig, sizeof(cacheconfig), "I", ""); SYSCTL_INT2QUAD(_hw, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_KERN, &page_size, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_hz, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_min_hz, ""); @@ -356,6 +369,29 @@ SYSCTL_PROC(_hw, HW_VECTORUNIT, vectorunit, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG SYSCTL_PROC(_hw, HW_L2SETTINGS, l2settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L2SETTINGS, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_L3SETTINGS, l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L3SETTINGS, sysctl_hw_generic, "I", ""); +/* + * Debugging interface to the CPU power management code. + */ +static void pmsSysctl SYSCTL_HANDLER_ARGS { + + pmsctl_t ctl; + int error; + boolean_t intr; + + if ((error = SYSCTL_IN(req, &ctl, sizeof(ctl)))) + return(error); + + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ + error = pmsControl(ctl.request, (user_addr_t)ctl.reqaddr, ctl.reqsize); + (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ + + return(error); +} + +SYSCTL_PROC(_hw, OID_AUTO, pms, CTLTYPE_STRUCT | CTLFLAG_WR, 0, 0, pmsSysctl, "S", "Processor Power Management"); + + + /****************************************************************************** * Generic MIB initialisation. * @@ -461,7 +497,78 @@ sysctl_mib_init(void) if (dcbtstreams_flag >= 0) sysctl_register_oid(&sysctl__hw_optional_dcbtstreams); } -#else + + /* hw.cpufamily */ + switch (cpusubtype) { + case CPU_SUBTYPE_POWERPC_750: + cpufamily = CPUFAMILY_POWERPC_G3; + break; + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + cpufamily = CPUFAMILY_POWERPC_G4; + break; + case CPU_SUBTYPE_POWERPC_970: + cpufamily = CPUFAMILY_POWERPC_G5; + break; + default: + cpufamily = CPUFAMILY_UNKNOWN; + } + + /* hw.cacheconfig */ + cacheconfig[0] = 0; /* XXX not supported on PowerPC */ + +#elif defined (__i386__) + + +#define DECLARE_X86_HW_OPTIONAL_FLAGS(named, BITS, flags) { \ + static int named##_flag = -1; \ + static SYSCTL_INT(_hw_optional, OID_AUTO, named, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | flags, &named##_flag, 0, ""); \ + named##_flag = ((_get_cpu_capabilities() & BITS) == BITS)? 1 : 0; \ + sysctl_register_oid(&sysctl__hw_optional_##named); \ + } + +#define DECLARE_X86_HW_OPTIONAL(named, BITS) \ + DECLARE_X86_HW_OPTIONAL_FLAGS(named, BITS, 0) + +#define DECLARE_X86_HW_OPTIONAL_MASKED(named, BITS) \ + DECLARE_X86_HW_OPTIONAL_FLAGS(named, BITS, CTLFLAG_MASKED) + + DECLARE_X86_HW_OPTIONAL(mmx, kHasMMX); + DECLARE_X86_HW_OPTIONAL(sse, kHasSSE); + DECLARE_X86_HW_OPTIONAL(sse2, kHasSSE2); + DECLARE_X86_HW_OPTIONAL(sse3, kHasSSE3); + if (_get_cpu_capabilities() & k64Bit) + DECLARE_X86_HW_OPTIONAL(x86_64, k64Bit); + if (_get_cpu_capabilities() & kHasSupplementalSSE3) { + DECLARE_X86_HW_OPTIONAL_MASKED(mni, kHasSupplementalSSE3); /* XXX */ + DECLARE_X86_HW_OPTIONAL(supplementalsse3, kHasSupplementalSSE3); + } + + /* hw.cpufamily */ + switch (cpuid_info()->cpuid_family) { + case 6: + switch (cpuid_info()->cpuid_model) { + case 14: + cpufamily = CPUFAMILY_INTEL_6_14; /* Core Solo/Duo */ + break; + case 15: + cpufamily = CPUFAMILY_INTEL_6_15; + break; + default: + cpufamily = CPUFAMILY_UNKNOWN; + } + break; + default: + cpufamily = CPUFAMILY_UNKNOWN; + } + /* hw.cacheconfig */ + cacheconfig[0] = cpuid_info()->cpuid_cores_per_package; + cacheconfig[1] = MAX(cpuid_info()->cache_sharing[L1I], cpuid_info()->cache_sharing[L1D]); + cacheconfig[2] = cpuid_info()->cache_sharing[L2U]; + cacheconfig[3] = cpuid_info()->cache_sharing[L3U]; + cacheconfig[4] = 0; + +#else /* end __i386 */ # warning we do not support this platform yet #endif /* __ppc__ */ diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 96ea755f3..f1a0cc880 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -716,7 +716,7 @@ pgrpdump(void) int proc_is_classic(struct proc *p) { - return (p->p_flag & P_CLASSIC) ? 1 : 0; + return (p->p_flag & P_TRANSLATED) ? 1 : 0; } /* XXX Why does this function exist? Need to kill it off... */ diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index 6ce5a3874..3a8419566 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -97,7 +97,7 @@ rlim_t maxsmap = MAXSSIZ; /* XXX */ * * Note: would be in kern/subr_param.c in FreeBSD. */ -int maxprocperuid = CHILD_MAX; /* max # of procs per user */ +extern int maxprocperuid; /* max # of procs per user */ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */ SYSCTL_INT( _kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, diff --git a/bsd/kern/kern_shutdown.c b/bsd/kern/kern_shutdown.c index f2bf82fc4..931b27c3d 100644 --- a/bsd/kern/kern_shutdown.c +++ b/bsd/kern/kern_shutdown.c @@ -141,6 +141,10 @@ boot(paniced, howto, command) if (paniced == RB_PANIC) hostboot_option = HOST_REBOOT_HALT; + if (howto & RB_UPSDELAY) { + hostboot_option = HOST_REBOOT_UPSDELAY; + } + /* * if we're going to power down due to a halt, * give the disks a chance to finish getting diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index 9b42ea1e2..479b1f5e1 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -606,8 +606,11 @@ execsigs(p, thr_act) { register struct sigacts *ps = p->p_sigacts; register int nc, mask; - struct uthread *ut; + struct uthread *ut = (struct uthread *)0; + if (thr_act){ + ut = (struct uthread *)get_bsdthread_info(thr_act); + } /* * Reset caught signals. Held signals remain held * through p_sigmask (unless they were caught, @@ -621,7 +624,6 @@ execsigs(p, thr_act) if (nc != SIGCONT) p->p_sigignore |= mask; if (thr_act){ - ut = (struct uthread *)get_bsdthread_info(thr_act); ut->uu_siglist &= ~mask; p->p_siglist &= ~mask; } else @@ -637,6 +639,13 @@ execsigs(p, thr_act) ps->ps_sigstk.ss_size = 0; ps->ps_sigstk.ss_sp = USER_ADDR_NULL; ps->ps_flags = 0; + if (thr_act) { + ut->uu_sigstk.ss_flags = SA_DISABLE; + ut->uu_sigstk.ss_size = 0; + ut->uu_sigstk.ss_sp = USER_ADDR_NULL; + ut->uu_flag &= ~UT_ALTSTACK; + } + ps->ps_sigonstack = 0; } /* @@ -1067,18 +1076,31 @@ int sigaltstack(struct proc *p, register struct sigaltstack_args *uap, __unused register_t *retval) { struct sigacts *psp; + struct user_sigaltstack *pstk; struct user_sigaltstack ss; + struct uthread *uth; + int uthsigaltstack = 0; int error; + uth = (struct uthread *)get_bsdthread_info(current_thread()); + uthsigaltstack = p->p_lflag & P_LTHSIGSTACK; + psp = p->p_sigacts; - if ((psp->ps_flags & SAS_ALTSTACK) == 0) - psp->ps_sigstk.ss_flags |= SA_DISABLE; + if (uthsigaltstack != 0) { + pstk = &uth->uu_sigstk; + if ((uth->uu_flag & UT_ALTSTACK) == 0) + uth->uu_sigstk.ss_flags |= SA_DISABLE; + } else { + pstk = &psp->ps_sigstk; + if ((psp->ps_flags & SAS_ALTSTACK) == 0) + psp->ps_sigstk.ss_flags |= SA_DISABLE; + } if (uap->oss) { if (IS_64BIT_PROCESS(p)) { - error = copyout(&psp->ps_sigstk, uap->oss, sizeof(struct user_sigaltstack)); + error = copyout(pstk, uap->oss, sizeof(struct user_sigaltstack)); } else { struct sigaltstack ss32; - sigaltstack_64to32(&psp->ps_sigstk, &ss32); + sigaltstack_64to32(pstk, &ss32); error = copyout(&ss32, uap->oss, sizeof(struct sigaltstack)); } if (error) @@ -1100,18 +1122,32 @@ sigaltstack(struct proc *p, register struct sigaltstack_args *uap, __unused regi } if (ss.ss_flags & SA_DISABLE) { - if (psp->ps_sigstk.ss_flags & SA_ONSTACK) - return (EINVAL); - psp->ps_flags &= ~SAS_ALTSTACK; - psp->ps_sigstk.ss_flags = ss.ss_flags; + if (uthsigaltstack != 0) { + /* if we are here we are not in the signal handler ;so no need to check */ + if (uth->uu_sigstk.ss_flags & SA_ONSTACK) + return (EINVAL); + uth->uu_flag &= ~UT_ALTSTACK; + uth->uu_sigstk.ss_flags = ss.ss_flags; + } else { + if (psp->ps_sigstk.ss_flags & SA_ONSTACK) + return (EINVAL); + psp->ps_flags &= ~SAS_ALTSTACK; + psp->ps_sigstk.ss_flags = ss.ss_flags; + } + return (0); } /* The older stacksize was 8K, enforce that one so no compat problems */ #define OLDMINSIGSTKSZ 8*1024 if (ss.ss_size < OLDMINSIGSTKSZ) return (ENOMEM); - psp->ps_flags |= SAS_ALTSTACK; - psp->ps_sigstk= ss; + if (uthsigaltstack != 0) { + uth->uu_flag |= UT_ALTSTACK; + uth->uu_sigstk= ss; + } else { + psp->ps_flags |= SAS_ALTSTACK; + psp->ps_sigstk= ss; + } return (0); } @@ -1459,7 +1495,7 @@ psignal_lock(p, signum, withlock) register int signum; register int withlock; { - register int s, prop; + register int prop; register sig_t action; thread_t sig_thread_act; register task_t sig_task; @@ -1494,9 +1530,7 @@ psignal_lock(p, signum, withlock) return; } - s = splhigh(); KNOTE(&p->p_klist, NOTE_SIGNAL | signum); - splx(s); /* * do not send signals to the process that has the thread @@ -2632,12 +2666,15 @@ static int filt_sigattach(struct knote *kn) { struct proc *p = current_proc(); + boolean_t funnel_state; kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ - /* XXX lock the proc here while adding to the list? */ + /* Take the funnel to protect the proc while adding to the list */ + funnel_state = thread_funnel_set(kernel_flock, TRUE); KNOTE_ATTACH(&p->p_klist, kn); + thread_funnel_set(kernel_flock, funnel_state); return (0); } @@ -2646,8 +2683,11 @@ static void filt_sigdetach(struct knote *kn) { struct proc *p = kn->kn_ptr.p_proc; + boolean_t funnel_state; + funnel_state = thread_funnel_set(kernel_flock, TRUE); KNOTE_DETACH(&p->p_klist, kn); + thread_funnel_set(kernel_flock, funnel_state); } /* diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index dd0736fc9..2b6a8b4a8 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -102,9 +102,14 @@ extern vm_map_t bsd_pageable_map; #include #include +#include #include +#ifdef __i386__ +#include +#endif + sysctlfn kern_sysctl; #ifdef DEBUG sysctlfn debug_sysctl; @@ -116,10 +121,10 @@ extern sysctlfn cpu_sysctl; extern int aio_max_requests; extern int aio_max_requests_per_process; extern int aio_worker_threads; -extern int maxprocperuid; extern int maxfilesperproc; extern int lowpri_IO_window_msecs; extern int lowpri_IO_delay_msecs; +extern int nx_enabled; static void fill_eproc(struct proc *p, struct eproc *ep); @@ -313,7 +318,7 @@ __sysctl(struct proc *p, struct __sysctl_args *uap, __unused register_t *retval) if (uap->new != USER_ADDR_NULL && ((name[0] == CTL_KERN && !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO || name[1] == KERN_PROCDELAYTERM || - name[1] == KERN_PROC_LOW_PRI_IO)) + name[1] == KERN_PROC_LOW_PRI_IO || name[1] == KERN_PROCNAME || name[1] == KERN_THALTSTACK)) || (name[0] == CTL_HW) || (name[0] == CTL_VM) || (name[0] == CTL_VFS)) @@ -427,9 +432,6 @@ __sysctl(struct proc *p, struct __sysctl_args *uap, __unused register_t *retval) /* * Attributes stored in the kernel. */ -extern char classichandler[32]; -extern uint32_t classichandler_fsid; -extern long classichandler_fileid; __private_extern__ char corefilename[MAXPATHLEN+1]; __private_extern__ int do_coredump; __private_extern__ int sugid_coredump; @@ -468,8 +470,9 @@ sysctl_affinity( return (ENOTSUP); } + static int -sysctl_classic( +sysctl_translate( int *name, u_int namelen, user_addr_t oldBuf, @@ -492,11 +495,60 @@ sysctl_classic( return (EPERM); return sysctl_rdint(oldBuf, oldSize, newBuf, - (p->p_flag & P_CLASSIC) ? 1 : 0); + (p->p_flag & P_TRANSLATED) ? 1 : 0); +} + +int +set_archhandler(struct proc *p, int arch) +{ + int error; + struct nameidata nd; + struct vnode_attr va; + struct vfs_context context; + char *archhandler; + + switch(arch) { + case CPU_TYPE_POWERPC: + archhandler = exec_archhandler_ppc.path; + break; + default: + return (EBADARCH); + } + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + CAST_USER_ADDR_T(archhandler), &context); + error = namei(&nd); + if (error) + return (error); + nameidone(&nd); + + /* Check mount point */ + if ((nd.ni_vp->v_mount->mnt_flag & MNT_NOEXEC) || + (nd.ni_vp->v_type != VREG)) { + vnode_put(nd.ni_vp); + return (EACCES); + } + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + error = vnode_getattr(nd.ni_vp, &va, &context); + if (error) { + vnode_put(nd.ni_vp); + return (error); + } + vnode_put(nd.ni_vp); + + exec_archhandler_ppc.fsid = va.va_fsid; + exec_archhandler_ppc.fileid = (u_long)va.va_fileid; + return 0; } static int -sysctl_classichandler( +sysctl_exec_archhandler_ppc( __unused int *name, __unused u_int namelen, user_addr_t oldBuf, @@ -509,18 +561,18 @@ sysctl_classichandler( size_t len; struct nameidata nd; struct vnode_attr va; - char handler[sizeof(classichandler)]; + char handler[sizeof(exec_archhandler_ppc.path)]; struct vfs_context context; context.vc_proc = p; context.vc_ucred = kauth_cred_get(); if (oldSize) { - len = strlen(classichandler) + 1; + len = strlen(exec_archhandler_ppc.path) + 1; if (oldBuf) { if (*oldSize < len) return (ENOMEM); - error = copyout(classichandler, oldBuf, len); + error = copyout(exec_archhandler_ppc.path, oldBuf, len); if (error) return (error); } @@ -530,44 +582,26 @@ sysctl_classichandler( error = suser(context.vc_ucred, &p->p_acflag); if (error) return (error); - if (newSize >= sizeof(classichandler)) + if (newSize >= sizeof(exec_archhandler_ppc.path)) return (ENAMETOOLONG); error = copyin(newBuf, handler, newSize); if (error) return (error); handler[newSize] = 0; - - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, - CAST_USER_ADDR_T(handler), &context); - error = namei(&nd); + strcpy(exec_archhandler_ppc.path, handler); + error = set_archhandler(p, CPU_TYPE_POWERPC); if (error) return (error); - nameidone(&nd); - - /* Check mount point */ - if ((nd.ni_vp->v_mount->mnt_flag & MNT_NOEXEC) || - (nd.ni_vp->v_type != VREG)) { - vnode_put(nd.ni_vp); - return (EACCES); - } - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_fsid); - VATTR_WANTED(&va, va_fileid); - error = vnode_getattr(nd.ni_vp, &va, &context); - if (error) { - vnode_put(nd.ni_vp); - return (error); - } - vnode_put(nd.ni_vp); - - classichandler_fsid = va.va_fsid; - classichandler_fileid = (u_long)va.va_fileid; - strcpy(classichandler, handler); } return 0; } +SYSCTL_NODE(_kern, KERN_EXEC, exec, CTLFLAG_RD, 0, ""); + +SYSCTL_NODE(_kern_exec, OID_AUTO, archhandler, CTLFLAG_RD, 0, ""); + +SYSCTL_STRING(_kern_exec_archhandler, OID_AUTO, powerpc, CTLFLAG_RD, + exec_archhandler_ppc.path, 0, ""); extern int get_kernel_symfile( struct proc *, char **); __private_extern__ int @@ -595,9 +629,11 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, || name[0] == KERN_IPC || name[0] == KERN_SYSV || name[0] == KERN_AFFINITY - || name[0] == KERN_CLASSIC + || name[0] == KERN_TRANSLATE + || name[0] == KERN_EXEC || name[0] == KERN_PANICINFO - || name[0] == KERN_POSIX) + || name[0] == KERN_POSIX + || name[0] == KERN_TFP) ) return (ENOTDIR); /* overloaded */ @@ -713,12 +749,12 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, case KERN_AFFINITY: return sysctl_affinity(name+1, namelen-1, oldp, oldlenp, newp, newlen, p); - case KERN_CLASSIC: - return sysctl_classic(name+1, namelen-1, oldp, oldlenp, - newp, newlen, p); + case KERN_TRANSLATE: + return sysctl_translate(name+1, namelen-1, oldp, oldlenp, newp, + newlen, p); case KERN_CLASSICHANDLER: - return sysctl_classichandler(name+1, namelen-1, oldp, oldlenp, - newp, newlen, p); + return sysctl_exec_archhandler_ppc(name+1, namelen-1, oldp, + oldlenp, newp, newlen, p); case KERN_AIOMAX: return( sysctl_aiomax( oldp, oldlenp, newp, newlen ) ); case KERN_AIOPROCMAX: @@ -845,9 +881,76 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, } return(error); } + case KERN_NX_PROTECTION: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(old_value) ) + return (ENOMEM); + if ( newp && newlen != sizeof(new_value) ) + return(EINVAL); + *oldlenp = sizeof(old_value); + + old_value = nx_enabled; + + if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) + return(error); +#ifdef __i386__ + /* + * Only allow setting if NX is supported on the chip + */ + if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) { +#endif + if (error == 0 && newp) + error = copyin(newp, &new_value, + sizeof(newlen)); + if (error == 0 && newp) + nx_enabled = new_value; +#ifdef __i386__ + } else if (newp) { + error = ENOTSUP; + } +#endif + return(error); + } case KERN_SHREG_PRIVATIZABLE: /* this kernel does implement shared_region_make_private_np() */ return (sysctl_rdint(oldp, oldlenp, newp, 1)); + case KERN_PROCNAME: + error = sysctl_trstring(oldp, oldlenp, newp, newlen, + &p->p_name[0], (2*MAXCOMLEN+1)); + return (error); + case KERN_THALTSTACK: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(int)) + return (ENOMEM); + if ( newp && newlen != sizeof(int) ) + return(EINVAL); + *oldlenp = sizeof(int); + old_value = (p->p_lflag & P_LTHSIGSTACK)? 1: 0; + if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) + return(error); + if (error == 0 && newp ) + error = copyin( newp, &new_value, sizeof(int) ); + if (error == 0 && newp) { + if (new_value) { + /* we cannot swich midstream if inuse */ + if ((p->p_sigacts->ps_flags & SAS_ALTSTACK) == SAS_ALTSTACK) + return(EPERM); + p->p_lflag |= P_LTHSIGSTACK; + } else { + /* we cannot swich midstream */ + if ((p->p_lflag & P_LTHSIGSTACK) == P_LTHSIGSTACK) + return(EPERM); + p->p_lflag &= ~P_LTHSIGSTACK; + } + } + return(error); + } default: return (ENOTSUP); } @@ -2089,3 +2192,98 @@ sysctl_maxproc(user_addr_t oldp, size_t *oldlenp, return( error ); } /* sysctl_maxproc */ + +#if __i386__ +static int +sysctl_sysctl_exec_affinity SYSCTL_HANDLER_ARGS +{ + struct proc *cur_proc = req->p; + int error; + + if (req->oldptr != USER_ADDR_NULL) { + cpu_type_t oldcputype = (cur_proc->p_flag & P_AFFINITY) ? CPU_TYPE_POWERPC : CPU_TYPE_I386; + if ((error = SYSCTL_OUT(req, &oldcputype, sizeof(oldcputype)))) + return error; + } + + if (req->newptr != USER_ADDR_NULL) { + cpu_type_t newcputype; + if ((error = SYSCTL_IN(req, &newcputype, sizeof(newcputype)))) + return error; + if (newcputype == CPU_TYPE_I386) + cur_proc->p_flag &= ~P_AFFINITY; + else if (newcputype == CPU_TYPE_POWERPC) + cur_proc->p_flag |= P_AFFINITY; + else + return (EINVAL); + } + + return 0; +} +SYSCTL_PROC(_sysctl, OID_AUTO, proc_exec_affinity, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, sysctl_sysctl_exec_affinity ,"I","proc_exec_affinity"); +#endif + +static int +fetch_process_cputype( + struct proc *cur_proc, + int *name, + u_int namelen, + cpu_type_t *cputype) +{ + struct proc *p = NULL; + cpu_type_t ret = 0; + + if (namelen == 0) + p = cur_proc; + else if (namelen == 1) { + p = pfind(name[0]); + if (p == NULL) + return (EINVAL); + if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) + && suser(kauth_cred_get(), &cur_proc->p_acflag)) + return (EPERM); + } else { + return EINVAL; + } + +#if __i386__ + if (p->p_flag & P_TRANSLATED) { + ret = CPU_TYPE_POWERPC; + } + else +#endif + { + ret = cpu_type(); + if (IS_64BIT_PROCESS(p)) + ret |= CPU_ARCH_ABI64; + } + *cputype = ret; + + return 0; +} + +static int +sysctl_sysctl_native SYSCTL_HANDLER_ARGS +{ + int error; + cpu_type_t proc_cputype = 0; + if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) + return error; + int res = 1; + if ((proc_cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) + res = 0; + return SYSCTL_OUT(req, &res, sizeof(res)); +} +SYSCTL_PROC(_sysctl, OID_AUTO, proc_native, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_native ,"I","proc_native"); + +static int +sysctl_sysctl_cputype SYSCTL_HANDLER_ARGS +{ + int error; + cpu_type_t proc_cputype = 0; + if ((error = fetch_process_cputype(req->p, (int *)arg1, arg2, &proc_cputype)) != 0) + return error; + return SYSCTL_OUT(req, &proc_cputype, sizeof(proc_cputype)); +} +SYSCTL_PROC(_sysctl, OID_AUTO, proc_cputype, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_cputype ,"I","proc_cputype"); + diff --git a/bsd/kern/kern_time.c b/bsd/kern/kern_time.c index 07354b8b7..cc413684d 100644 --- a/bsd/kern/kern_time.c +++ b/bsd/kern/kern_time.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -83,14 +83,6 @@ static void setthetime( void time_zone_slock_init(void); -int gettimeofday(struct proc *p, -#ifdef __ppc__ - struct ppc_gettimeofday_args *uap, -#else - struct gettimeofday_args *uap, -#endif - register_t *retval); - /* * Time of day and interval timer support. * @@ -99,52 +91,29 @@ int gettimeofday(struct proc *p, * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. - * - * XXX Y2038 bug because of clock_get_calendar_microtime() first argument */ /* ARGSUSED */ int -gettimeofday(__unused struct proc *p, -#ifdef __ppc__ - register struct ppc_gettimeofday_args *uap, -#else - register struct gettimeofday_args *uap, -#endif - __unused register_t *retval) +gettimeofday( +__unused struct proc *p, + struct gettimeofday_args *uap, + register_t *retval) { - struct timeval atv; int error = 0; struct timezone ltz; /* local copy */ -/* NOTE THIS implementation is for non ppc architectures only */ - - if (uap->tp) { - clock_get_calendar_microtime((uint32_t *)&atv.tv_sec, &atv.tv_usec); - if (IS_64BIT_PROCESS(p)) { - struct user_timeval user_atv; - user_atv.tv_sec = atv.tv_sec; - user_atv.tv_usec = atv.tv_usec; - /* - * This cast is not necessary for PPC, but is - * mostly harmless. - */ - error = copyout(&user_atv, CAST_USER_ADDR_T(uap->tp), sizeof(struct user_timeval)); - } else { - error = copyout(&atv, CAST_USER_ADDR_T(uap->tp), sizeof(struct timeval)); - } - if (error) - return(error); - } + if (uap->tp) + clock_gettimeofday(&retval[0], &retval[1]); if (uap->tzp) { lck_spin_lock(tz_slock); ltz = tz; lck_spin_unlock(tz_slock); - error = copyout((caddr_t)<z, CAST_USER_ADDR_T(uap->tzp), - sizeof (tz)); + + error = copyout((caddr_t)<z, CAST_USER_ADDR_T(uap->tzp), sizeof (tz)); } - return(error); + return (error); } /* @@ -605,13 +574,11 @@ time_zone_slock_init(void) { /* allocate lock group attribute and group */ tz_slock_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(tz_slock_grp_attr); tz_slock_grp = lck_grp_alloc_init("tzlock", tz_slock_grp_attr); /* Allocate lock attribute */ tz_slock_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(tz_slock_attr); /* Allocate the spin lock */ tz_slock = lck_spin_alloc_init(tz_slock_grp, tz_slock_attr); diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index c4b070b42..b1062b566 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -672,12 +672,10 @@ mbuf_tag_id_find_internal( lck_mtx_t *new_lock = NULL; grp_attrib = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attrib); lck_group = lck_grp_alloc_init("mbuf_tag_allocate_id", grp_attrib); lck_grp_attr_free(grp_attrib); lck_attrb = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attrb); - lck_attr_setdebug(lck_attrb); + new_lock = lck_mtx_alloc_init(lck_group, lck_attrb); if (!OSCompareAndSwap((UInt32)0, (UInt32)new_lock, (UInt32*)&mtag_id_lock)) { /* diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index 86b3e9c60..10b73c86d 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -46,12 +46,9 @@ sflt_init(void) /* Allocate a spin lock */ grp_attrib = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attrib); lck_group = lck_grp_alloc_init("socket filter lock", grp_attrib); lck_grp_attr_free(grp_attrib); lck_attrib = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attrib); - lck_attr_setdebug(lck_attrib); sock_filter_lock = lck_mtx_alloc_init(lck_group, lck_attrib); lck_grp_free(lck_group); lck_attr_free(lck_attrib); diff --git a/bsd/kern/mach_fat.c b/bsd/kern/mach_fat.c index 408d2ecb2..7c9baaeec 100644 --- a/bsd/kern/mach_fat.c +++ b/bsd/kern/mach_fat.c @@ -38,13 +38,8 @@ #include #include #include -#include - -/* XXX should be in common header */ -extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); - -#define CPU_TYPE_NATIVE (cpu_type()) -#define CPU_TYPE_CLASSIC CPU_TYPE_POWERPC +#include +#include /********************************************************************** * Routine: fatfile_getarch2() @@ -100,7 +95,7 @@ fatfile_getarch2( * Map portion that must be accessible directly into * kernel's map. */ - nfat_arch = NXSwapBigLongToHost(header->nfat_arch); + nfat_arch = OSSwapBigToHostInt32(header->nfat_arch); end_of_archs = sizeof(struct fat_header) + nfat_arch * sizeof(struct fat_arch); @@ -134,15 +129,15 @@ fatfile_getarch2( /* * Check to see if right cpu type. */ - if(((cpu_type_t)NXSwapBigIntToHost(arch->cputype) & ~mask_bits) != req_cpu_type) + if(((cpu_type_t)OSSwapBigToHostInt32(arch->cputype) & ~mask_bits) != req_cpu_type) continue; /* * Get the grade of the cpu subtype. */ grade = grade_binary( - NXSwapBigIntToHost(arch->cputype), - NXSwapBigIntToHost(arch->cpusubtype)); + OSSwapBigToHostInt32(arch->cputype), + OSSwapBigToHostInt32(arch->cpusubtype)); /* * Remember it if it's the best we've seen. @@ -160,15 +155,15 @@ fatfile_getarch2( lret = LOAD_BADARCH; } else { archret->cputype = - NXSwapBigIntToHost(best_arch->cputype); + OSSwapBigToHostInt32(best_arch->cputype); archret->cpusubtype = - NXSwapBigIntToHost(best_arch->cpusubtype); + OSSwapBigToHostInt32(best_arch->cpusubtype); archret->offset = - NXSwapBigLongToHost(best_arch->offset); + OSSwapBigToHostInt32(best_arch->offset); archret->size = - NXSwapBigLongToHost(best_arch->size); + OSSwapBigToHostInt32(best_arch->size); archret->align = - NXSwapBigLongToHost(best_arch->align); + OSSwapBigToHostInt32(best_arch->align); lret = LOAD_SUCCESS; } @@ -179,8 +174,6 @@ fatfile_getarch2( return(lret); } -extern char classichandler[]; - load_return_t fatfile_getarch_affinity( struct vnode *vp, @@ -189,15 +182,15 @@ fatfile_getarch_affinity( int affinity) { load_return_t lret; - int handler = (classichandler[0] != 0); + int handler = (exec_archhandler_ppc.path[0] != 0); cpu_type_t primary_type, fallback_type; if (handler && affinity) { - primary_type = CPU_TYPE_CLASSIC; - fallback_type = CPU_TYPE_NATIVE; + primary_type = CPU_TYPE_POWERPC; + fallback_type = cpu_type(); } else { - primary_type = CPU_TYPE_NATIVE; - fallback_type = CPU_TYPE_CLASSIC; + primary_type = cpu_type(); + fallback_type = CPU_TYPE_POWERPC; } /* * Ignore the architectural bits when determining if an image @@ -231,7 +224,7 @@ fatfile_getarch( vm_offset_t data_ptr, struct fat_arch *archret) { - return fatfile_getarch2(vp, data_ptr, CPU_TYPE_NATIVE, 0, archret); + return fatfile_getarch2(vp, data_ptr, cpu_type(), 0, archret); } /********************************************************************** @@ -256,6 +249,6 @@ fatfile_getarch_with_bits( vm_offset_t data_ptr, struct fat_arch *archret) { - return fatfile_getarch2(vp, data_ptr, archbits | CPU_TYPE_NATIVE, 0, archret); + return fatfile_getarch2(vp, data_ptr, archbits | cpu_type(), 0, archret); } diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index a12aa7682..00dd19683 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -76,7 +76,7 @@ * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE * when KERNEL is defined. */ -extern pmap_t pmap_create(vm_map_size_t size); +extern pmap_t pmap_create(vm_map_size_t size, boolean_t is_64bit); extern void pmap_switch(pmap_t); extern void pmap_map_sharedpage(task_t task, pmap_t pmap); @@ -92,7 +92,6 @@ extern kern_return_t thread_state_initialize(thread_t thread); /* XXX should have prototypes in a shared header file */ -extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); extern int get_map_nentries(vm_map_t); extern kern_return_t thread_userstack(thread_t, int, thread_state_t, unsigned int, mach_vm_offset_t *, int *); @@ -196,7 +195,8 @@ load_dylinker( thread_t thr_act, int depth, load_result_t *result, - boolean_t clean_regions + boolean_t clean_regions, + boolean_t is_64bit ); static load_return_t @@ -236,19 +236,22 @@ load_machfile( if (create_map) { old_map = current_map(); -#ifdef i386 +#ifdef NO_NESTED_PMAP pmap = get_task_pmap(current_task()); pmap_reference(pmap); -#else - pmap = pmap_create((vm_map_size_t) 0); -#endif +#else /* NO_NESTED_PMAP */ + pmap = pmap_create((vm_map_size_t) 0, (imgp->ip_flags & IMGPF_IS_64BIT)); +#endif /* NO_NESTED_PMAP */ map = vm_map_create(pmap, - get_map_min(old_map), - get_map_max(old_map), - TRUE); /**** FIXME ****/ + 0, + vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)), + TRUE); } else map = new_map; - + + if ( (header->flags & MH_ALLOW_STACK_EXECUTION) ) + vm_map_disable_NX(map); + if (!result) result = &myresult; @@ -265,6 +268,15 @@ load_machfile( return(lret); } + /* + * For 64-bit users, check for presence of a 4GB page zero + * which will enable the kernel to share the user's address space + * and hence avoid TLB flushes on kernel entry/exit + */ + if ((imgp->ip_flags & IMGPF_IS_64BIT) && + vm_map_has_4GB_pagezero(map)) + vm_map_set_4GB_pagezero(map); + /* * Commit to new map. First make sure that the current * users of the task get done with it, and that we clean @@ -276,13 +288,15 @@ load_machfile( * That lets us get off the pmap associated with it, and * then we can release it. */ + if (create_map) { task_halt(current_task()); old_map = swap_task_map(current_task(), map); -#ifndef i386 + vm_map_clear_4GB_pagezero(old_map); +#ifndef NO_NESTED_PMAP pmap_switch(pmap); /* Make sure we are using the new pmap */ -#endif +#endif /* !NO_NESTED_PMAP */ vm_map_deallocate(old_map); } return(LOAD_SUCCESS); @@ -622,19 +636,29 @@ parse_machfile( } } if (dlp != 0) - ret = load_dylinker(dlp, dlarchbits, map, thr_act, depth, result, clean_regions); + ret = load_dylinker(dlp, dlarchbits, map, thr_act, depth, result, clean_regions, abi64); if(depth == 1) { if (result->thread_count == 0) ret = LOAD_FAILURE; -#ifdef __ppc__ else if ( abi64 ) { /* Map in 64-bit commpage */ /* LP64todo - make this clean */ pmap_map_sharedpage(current_task(), get_map_pmap(map)); vm_map_commpage64(map); + } else { +#ifdef __i386__ + /* + * On Intel, the comm page doesn't get mapped + * automatically because it goes beyond the current end + * of the VM map in the current 3GB/1GB address space + * model. + * XXX This will probably become unnecessary when we + * switch to the 4GB/4GB address space model. + */ + vm_map_commpage32(map); +#endif /* __i386__ */ } -#endif } } @@ -647,6 +671,62 @@ parse_machfile( return(ret); } +#ifndef SG_PROTECTED_VERSION_1 +#define SG_PROTECTED_VERSION_1 0x8 +#endif /* SG_PROTECTED_VERSION_1 */ + +#ifdef __i386__ + +#define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64) + +static load_return_t +unprotect_segment_64( + uint64_t file_off, + uint64_t file_size, + vm_map_t map, + vm_map_offset_t map_addr, + vm_map_size_t map_size) +{ + kern_return_t kr; + + /* + * The first APPLE_UNPROTECTED_HEADER_SIZE bytes (from offset 0 of + * this part of a Universal binary) are not protected... + * The rest needs to be "transformed". + */ + if (file_off <= APPLE_UNPROTECTED_HEADER_SIZE && + file_off + file_size <= APPLE_UNPROTECTED_HEADER_SIZE) { + /* it's all unprotected, nothing to do... */ + kr = KERN_SUCCESS; + } else { + if (file_off <= APPLE_UNPROTECTED_HEADER_SIZE) { + /* + * We start mapping in the unprotected area. + * Skip the unprotected part... + */ + vm_map_offset_t delta; + + delta = APPLE_UNPROTECTED_HEADER_SIZE; + delta -= file_off; + map_addr += delta; + map_size -= delta; + } + /* ... transform the rest of the mapping. */ + kr = vm_map_apple_protected(map, + map_addr, + map_addr + map_size); + } + + if (kr != KERN_SUCCESS) { + return LOAD_FAILURE; + } + return LOAD_SUCCESS; +} +#else /* __i386__ */ +#define unprotect_segment_64(file_off, file_size, map, map_addr, map_size) \ + LOAD_SUCCESS +#endif /* __i386__ */ + static load_return_t load_segment( @@ -682,6 +762,27 @@ load_segment( map_size = round_page(scp->filesize); map_addr = trunc_page(scp->vmaddr); +#if 0 /* XXX (4596982) this interferes with Rosetta */ + if (map_addr == 0 && + map_size == 0 && + seg_size != 0 && + (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && + (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { + /* + * This is a "page zero" segment: it starts at address 0, + * is not mapped from the binary file and is not accessible. + * User-space should never be able to access that memory, so + * make it completely off limits by raising the VM map's + * minimum offset. + */ + ret = vm_map_raise_min_offset(map, (vm_map_offset_t) seg_size); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + return LOAD_SUCCESS; + } +#endif + map_offset = pager_offset + scp->fileoff; if (map_size > 0) { @@ -732,28 +833,28 @@ load_segment( if (delta_size > 0) { vm_offset_t tmp = map_addr + map_size; - ret = vm_allocate(map, &tmp, delta_size, VM_FLAGS_FIXED); + ret = vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, + NULL, 0, FALSE, + scp->initprot, scp->maxprot, + VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) return(LOAD_NOSPACE); } - /* - * Set protection values. (Note: ignore errors!) - */ - - if (scp->maxprot != VM_PROT_DEFAULT) { - (void) vm_protect(map, - map_addr, seg_size, - TRUE, scp->maxprot); - } - if (scp->initprot != VM_PROT_DEFAULT) { - (void) vm_protect(map, - map_addr, seg_size, - FALSE, scp->initprot); - } if ( (scp->fileoff == 0) && (scp->filesize != 0) ) result->mach_header = map_addr; - return(LOAD_SUCCESS); + + if (scp->flags & SG_PROTECTED_VERSION_1) { + ret = unprotect_segment_64((uint64_t) scp->fileoff, + (uint64_t) scp->filesize, + map, + (vm_map_offset_t) map_addr, + (vm_map_size_t) map_size); + } else { + ret = LOAD_SUCCESS; + } + + return ret; } static @@ -791,6 +892,25 @@ load_segment_64( map_size = round_page_64(scp64->filesize); /* limited to 32 bits */ map_addr = round_page_64(scp64->vmaddr); + if (map_addr == 0 && + map_size == 0 && + seg_size != 0 && + (scp64->initprot & VM_PROT_ALL) == VM_PROT_NONE && + (scp64->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { + /* + * This is a "page zero" segment: it starts at address 0, + * is not mapped from the binary file and is not accessible. + * User-space should never be able to access that memory, so + * make it completely off limits by raising the VM map's + * minimum offset. + */ + ret = vm_map_raise_min_offset(map, seg_size); + if (ret != KERN_SUCCESS) { + return LOAD_FAILURE; + } + return LOAD_SUCCESS; + } + map_offset = pager_offset + scp64->fileoff; /* limited to 32 bits */ if (map_size > 0) { @@ -841,28 +961,28 @@ load_segment_64( if (delta_size > 0) { mach_vm_offset_t tmp = map_addr + map_size; - ret = mach_vm_allocate(map, &tmp, delta_size, VM_FLAGS_FIXED); + ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, + NULL, 0, FALSE, + scp64->initprot, scp64->maxprot, + VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) return(LOAD_NOSPACE); } - /* - * Set protection values. (Note: ignore errors!) - */ - - if (scp64->maxprot != VM_PROT_DEFAULT) { - (void) mach_vm_protect(map, - map_addr, seg_size, - TRUE, scp64->maxprot); - } - if (scp64->initprot != VM_PROT_DEFAULT) { - (void) mach_vm_protect(map, - map_addr, seg_size, - FALSE, scp64->initprot); - } if ( (scp64->fileoff == 0) && (scp64->filesize != 0) ) result->mach_header = map_addr; - return(LOAD_SUCCESS); + + if (scp64->flags & SG_PROTECTED_VERSION_1) { + ret = unprotect_segment_64(scp64->fileoff, + scp64->filesize, + map, + map_addr, + map_size); + } else { + ret = LOAD_SUCCESS; + } + + return ret; } static @@ -1104,7 +1224,8 @@ load_dylinker( thread_t thr_act, int depth, load_result_t *result, - boolean_t clean_regions + boolean_t clean_regions, + boolean_t is_64bit ) { char *name; @@ -1138,7 +1259,8 @@ load_dylinker( * Load the Mach-O. * Use a temporary map to do the work. */ - copy_map = vm_map_create(pmap_create(vm_map_round_page(macho_size)), + copy_map = vm_map_create(pmap_create(vm_map_round_page(macho_size), + is_64bit), get_map_min(map), get_map_max(map), TRUE); if (VM_MAP_NULL == copy_map) { ret = LOAD_RESOURCE; diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index caa043027..e07e0300a 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -81,7 +81,6 @@ #include #include -#include /* Macros to clear/set/test flags. */ @@ -90,12 +89,8 @@ #define ISSET(t, f) ((t) & (f)) extern thread_t port_name_to_thread(mach_port_name_t port_name); -extern kern_return_t thread_getstatus(thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); extern thread_t get_firstthread(task_t); -#if defined (ppc) -extern kern_return_t thread_setstatus(thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t count); -#endif /* * sys-trace system call. @@ -111,15 +106,6 @@ ptrace(p, uap, retval) task_t task; thread_t th_act; struct uthread *ut; - int *locr0; -#if defined(ppc) - struct ppc_thread_state64 statep; -#elif defined(i386) - struct i386_saved_state statep; -#else -#error architecture not supported -#endif - unsigned long state_count; int tr_sigexc = 0; AUDIT_ARG(cmd, uap->req); @@ -267,39 +253,16 @@ ptrace(p, uap, retval) th_act = (thread_t)get_firstthread(task); if (th_act == THREAD_NULL) goto errorLabel; - ut = (uthread_t)get_bsdthread_info(th_act); - locr0 = ut->uu_ar0; -#if defined(i386) - state_count = i386_NEW_THREAD_STATE_COUNT; - if (thread_getstatus(th_act, i386_NEW_THREAD_STATE, &statep, &state_count) != KERN_SUCCESS) { - goto errorLabel; - } -#elif defined(ppc) - state_count = PPC_THREAD_STATE64_COUNT; - if (thread_getstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, (mach_msg_type_number_t *)&state_count) != KERN_SUCCESS) { - goto errorLabel; - } -#else -#error architecture not supported -#endif + if (uap->addr != (user_addr_t)1) { -#if defined(i386) - locr0[PC] = (int)uap->addr; -#elif defined(ppc) +#if defined(ppc) #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) - if (!ALIGNED((int)uap->addr, sizeof(int))) - return (ERESTART); - - statep.srr0 = uap->addr; - state_count = PPC_THREAD_STATE64_COUNT; - if (thread_setstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, state_count) != KERN_SUCCESS) { - goto errorLabel; - } + if (!ALIGNED((int)uap->addr, sizeof(int))) + return (ERESTART); #undef ALIGNED -#else -#error architecture not implemented! #endif - } /* uap->addr != (user_addr_t)1 */ + thread_setentrypoint(th_act, uap->addr); + } if ((unsigned)uap->data >= NSIG) goto errorLabel; @@ -307,37 +270,18 @@ ptrace(p, uap, retval) if (uap->data != 0) { psignal_lock(t, uap->data, 0); } -#if defined(ppc) - state_count = PPC_THREAD_STATE64_COUNT; - if (thread_getstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, (mach_msg_type_number_t *)&state_count) != KERN_SUCCESS) { - goto errorLabel; - } -#endif - -#define MSR_SE_BIT 21 if (uap->req == PT_STEP) { -#if defined(i386) - locr0[PS] |= PSL_T; -#elif defined(ppc) - statep.srr1 |= MASK(MSR_SE); -#else -#error architecture not implemented! -#endif - } /* uap->req == PT_STEP */ - else { /* PT_CONTINUE - clear trace bit if set */ -#if defined(i386) - locr0[PS] &= ~PSL_T; -#elif defined(ppc) - statep.srr1 &= ~MASK(MSR_SE); -#endif + /* + * set trace bit + */ + thread_setsinglestep(th_act, 1); + } else { + /* + * clear trace bit if on + */ + thread_setsinglestep(th_act, 0); } -#if defined (ppc) - state_count = PPC_THREAD_STATE64_COUNT; - if (thread_setstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, state_count) != KERN_SUCCESS) { - goto errorLabel; - } -#endif resume: t->p_xstat = uap->data; t->p_stat = SRUN; diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh index b86904d78..e2e5a7f95 100755 --- a/bsd/kern/makesyscalls.sh +++ b/bsd/kern/makesyscalls.sh @@ -138,13 +138,8 @@ s/\$//g printf "#include \n" > sysarg printf "\n#ifdef KERNEL\n" > sysarg printf "#ifdef __APPLE_API_PRIVATE\n" > sysarg - printf "#ifdef __ppc__\n" > sysarg printf "#define\tPAD_(t)\t(sizeof(uint64_t) <= sizeof(t) \\\n " > sysarg printf "\t\t? 0 : sizeof(uint64_t) - sizeof(t))\n" > sysarg - printf "#else\n" > sysarg - printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) \\\n " > sysarg - printf "\t\t? 0 : sizeof(register_t) - sizeof(t))\n" > sysarg - printf "#endif\n" > sysarg printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg printf "#define\tPADL_(t)\t0\n" > sysarg printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg @@ -155,7 +150,6 @@ s/\$//g printf "\n__BEGIN_DECLS\n" > sysarg printf "#ifndef __MUNGE_ONCE\n" > sysarg printf "#define __MUNGE_ONCE\n" > sysarg - printf "#ifdef __ppc__\n" > sysarg printf "void munge_w(const void *, void *); \n" > sysarg printf "void munge_ww(const void *, void *); \n" > sysarg printf "void munge_www(const void *, void *); \n" > sysarg @@ -164,31 +158,25 @@ s/\$//g printf "void munge_wwwwww(const void *, void *); \n" > sysarg printf "void munge_wwwwwww(const void *, void *); \n" > sysarg printf "void munge_wwwwwwww(const void *, void *); \n" > sysarg - printf "void munge_d(const void *, void *); \n" > sysarg - printf "void munge_dd(const void *, void *); \n" > sysarg - printf "void munge_ddd(const void *, void *); \n" > sysarg - printf "void munge_dddd(const void *, void *); \n" > sysarg - printf "void munge_ddddd(const void *, void *); \n" > sysarg - printf "void munge_dddddd(const void *, void *); \n" > sysarg - printf "void munge_ddddddd(const void *, void *); \n" > sysarg - printf "void munge_dddddddd(const void *, void *); \n" > sysarg printf "void munge_wl(const void *, void *); \n" > sysarg printf "void munge_wlw(const void *, void *); \n" > sysarg printf "void munge_wwwl(const void *, void *); \n" > sysarg + printf "void munge_wwwlww(const void *, void *); \n" > sysarg printf "void munge_wwwwl(const void *, void *); \n" > sysarg printf "void munge_wwwwwl(const void *, void *); \n" > sysarg printf "void munge_wsw(const void *, void *); \n" > sysarg printf "void munge_wws(const void *, void *); \n" > sysarg printf "void munge_wwwsw(const void *, void *); \n" > sysarg + printf "#ifdef __ppc__\n" > sysarg + printf "void munge_d(const void *, void *); \n" > sysarg + printf "void munge_dd(const void *, void *); \n" > sysarg + printf "void munge_ddd(const void *, void *); \n" > sysarg + printf "void munge_dddd(const void *, void *); \n" > sysarg + printf "void munge_ddddd(const void *, void *); \n" > sysarg + printf "void munge_dddddd(const void *, void *); \n" > sysarg + printf "void munge_ddddddd(const void *, void *); \n" > sysarg + printf "void munge_dddddddd(const void *, void *); \n" > sysarg printf "#else \n" > sysarg - printf "#define munge_w NULL \n" > sysarg - printf "#define munge_ww NULL \n" > sysarg - printf "#define munge_www NULL \n" > sysarg - printf "#define munge_wwww NULL \n" > sysarg - printf "#define munge_wwwww NULL \n" > sysarg - printf "#define munge_wwwwww NULL \n" > sysarg - printf "#define munge_wwwwwww NULL \n" > sysarg - printf "#define munge_wwwwwwww NULL \n" > sysarg printf "#define munge_d NULL \n" > sysarg printf "#define munge_dd NULL \n" > sysarg printf "#define munge_ddd NULL \n" > sysarg @@ -197,14 +185,6 @@ s/\$//g printf "#define munge_dddddd NULL \n" > sysarg printf "#define munge_ddddddd NULL \n" > sysarg printf "#define munge_dddddddd NULL \n" > sysarg - printf "#define munge_wl NULL \n" > sysarg - printf "#define munge_wlw NULL \n" > sysarg - printf "#define munge_wwwl NULL \n" > sysarg - printf "#define munge_wwwwl NULL \n" > sysarg - printf "#define munge_wwwwwl NULL \n" > sysarg - printf "#define munge_wsw NULL \n" > sysarg - printf "#define munge_wws NULL \n" > sysarg - printf "#define munge_wwwsw NULL \n" > sysarg printf "#endif // __ppc__\n" > sysarg printf "#endif /* !__MUNGE_ONCE */\n" > sysarg @@ -663,11 +643,7 @@ s/\$//g } END { - printf "#ifdef __ppc__\n" > sysinc printf "#define AC(name) (sizeof(struct name) / sizeof(uint64_t))\n" > sysinc - printf "#else\n" > sysinc - printf "#define AC(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc - printf "#endif\n" > sysinc printf "\n" > sysinc printf("\n__END_DECLS\n") > sysprotoend diff --git a/bsd/kern/md5c.c b/bsd/kern/md5c.c index 80fce2d7a..616779655 100644 --- a/bsd/kern/md5c.c +++ b/bsd/kern/md5c.c @@ -22,7 +22,7 @@ * These notices must be retained in any copies of any part of this * documentation and/or software. * - * $Id: md5c.c,v 1.2 2000/09/14 20:34:44 lindak Exp $ + * $Id: md5c.c,v 1.2.4880.1 2005/06/24 01:47:07 lindak Exp $ * * This code is the same as the code published by RSA Inc. It has been * edited for clarity and style only. diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c index cf17502e0..2b91c34bc 100644 --- a/bsd/kern/posix_sem.c +++ b/bsd/kern/posix_sem.c @@ -52,6 +52,7 @@ #include #include #include +#include #include @@ -181,12 +182,10 @@ psem_lock_init( void ) { psx_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(psx_sem_subsys_lck_grp_attr); psx_sem_subsys_lck_grp = lck_grp_alloc_init("posix shared memory", psx_sem_subsys_lck_grp_attr); psx_sem_subsys_lck_attr = lck_attr_alloc_init(); - /* lck_attr_setdebug(psx_sem_subsys_lck_attr); */ lck_mtx_init(& psx_sem_subsys_mutex, psx_sem_subsys_lck_grp, psx_sem_subsys_lck_attr); } @@ -469,6 +468,8 @@ sem_open(struct proc *p, struct sem_open_args *uap, user_addr_t *retval) pinfo->psem_mode = cmode; pinfo->psem_uid = kauth_cred_getuid(kauth_cred_get()); pinfo->psem_gid = kauth_cred_get()->cr_gid; + bcopy(pnbuf, &pinfo->psem_name[0], PSEMNAMLEN); + pinfo->psem_name[PSEMNAMLEN]= 0; PSEM_SUBSYS_UNLOCK(); kret = semaphore_create(kernel_task, &pinfo->psem_semobject, SYNC_POLICY_FIFO, value); @@ -1027,3 +1028,35 @@ psem_kqfilter(__unused struct fileproc *fp, __unused struct knote *kn, return (ENOTSUP); } +int +fill_pseminfo(struct psemnode *pnode, struct psem_info * info) +{ + register struct pseminfo *pinfo; + struct stat *sb; + + PSEM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSEMINFO_NULL){ + PSEM_SUBSYS_UNLOCK(); + return(EINVAL); + } + +#if 0 + if ((pinfo->psem_flags & PSEM_ALLOCATED) != PSEM_ALLOCATED) { + PSEM_SUBSYS_UNLOCK(); + return(EINVAL); + } +#endif + + sb = &info->psem_stat; + bzero(sb, sizeof(struct stat)); + + sb->st_mode = pinfo->psem_mode; + sb->st_uid = pinfo->psem_uid; + sb->st_gid = pinfo->psem_gid; + sb->st_size = pinfo->psem_usecount; + bcopy(&pinfo->psem_name[0], &info->psem_name[0], PSEMNAMLEN+1); + + PSEM_SUBSYS_UNLOCK(); + return(0); +} + diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index f44264c37..9fe9b7785 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -81,6 +82,7 @@ #define f_data f_fglob->fg_data #define PSHMNAMLEN 31 /* maximum name segment length we bother with */ + struct pshminfo { unsigned int pshm_flags; unsigned int pshm_usecount; @@ -190,12 +192,10 @@ pshm_lock_init( void ) { psx_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(psx_shm_subsys_lck_grp_attr); psx_shm_subsys_lck_grp = lck_grp_alloc_init("posix shared memory", psx_shm_subsys_lck_grp_attr); psx_shm_subsys_lck_attr = lck_attr_alloc_init(); - /* lck_attr_setdebug(psx_shm_subsys_lck_attr); */ lck_mtx_init(& psx_shm_subsys_mutex, psx_shm_subsys_lck_grp, psx_shm_subsys_lck_attr); } @@ -479,6 +479,8 @@ shm_open(struct proc *p, struct shm_open_args *uap, register_t *retval) pinfo->pshm_mode = cmode; pinfo->pshm_uid = kauth_cred_getuid(kauth_cred_get()); pinfo->pshm_gid = kauth_cred_get()->cr_gid; + bcopy(pnbuf, &pinfo->pshm_name[0], PSHMNAMLEN); + pinfo->pshm_name[PSHMNAMLEN]=0; } else { /* already exists */ if( pinfo->pshm_flags & PSHM_INDELETE) { @@ -587,9 +589,9 @@ pshm_truncate(__unused struct proc *p, struct fileproc *fp, __unused int fd, struct pshminfo * pinfo; struct pshmnode * pnode ; kern_return_t kret; - vm_offset_t user_addr; + mach_vm_offset_t user_addr; mem_entry_name_port_t mem_object; - vm_size_t size; + mach_vm_size_t size; if (fp->f_type != DTYPE_PSXSHM) { return(EINVAL); @@ -612,17 +614,17 @@ pshm_truncate(__unused struct proc *p, struct fileproc *fp, __unused int fd, PSHM_SUBSYS_UNLOCK(); size = round_page_64(length); - kret = vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); + kret = mach_vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); if (kret != KERN_SUCCESS) goto out; - kret = mach_make_memory_entry (current_map(), &size, + kret = mach_make_memory_entry_64 (current_map(), &size, user_addr, VM_PROT_DEFAULT, &mem_object, 0); if (kret != KERN_SUCCESS) goto out; - vm_deallocate(current_map(), user_addr, size); + mach_vm_deallocate(current_map(), user_addr, size); PSHM_SUBSYS_LOCK(); pinfo->pshm_flags &= ~PSHM_DEFINED; @@ -1016,3 +1018,32 @@ pshm_kqfilter(__unused struct fileproc *fp, __unused struct knote *kn, { return(ENOTSUP); } + +int +fill_pshminfo(struct pshmnode * pshm, struct pshm_info * info) +{ + struct pshminfo *pinfo; + struct stat *sb; + + PSHM_SUBSYS_LOCK(); + if ((pinfo = pshm->pinfo) == PSHMINFO_NULL){ + PSHM_SUBSYS_UNLOCK(); + return(EINVAL); + } + + sb = &info->pshm_stat; + + bzero(sb, sizeof(struct stat)); + sb->st_mode = pinfo->pshm_mode; + sb->st_uid = pinfo->pshm_uid; + sb->st_gid = pinfo->pshm_gid; + sb->st_size = pinfo->pshm_length; + + info->pshm_mappaddr = pshm->mapp_addr; + bcopy(&pinfo->pshm_name[0], &info->pshm_name[0], PSHMNAMLEN+1); + + PSHM_SUBSYS_UNLOCK(); + return(0); +} + + diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c new file mode 100644 index 000000000..621886c82 --- /dev/null +++ b/bsd/kern/proc_info.c @@ -0,0 +1,1058 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * sysctl system call. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +#include + +#include + +struct pshmnode; +struct psemnode; +struct pipe; +struct kqueue; +struct atalk; + +int proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval); + +/* protos for proc_info calls */ +int proc_listpids(uint32_t type, uint32_t tyoneinfo, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, register_t * retval); + +/* protos for procpidinfo calls */ +int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd); +int proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo); +int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidthreadinfo(proc_t p, uint64_t arg, struct proc_threadinfo *pthinfo); +int proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); + + +/* protos for proc_pidfdinfo calls */ +int pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_socketinfo(socket_t so, struct fileproc *fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_pseminfo(struct psemnode * psem, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_pipeinfo(struct pipe * p, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_atalkinfo(struct atalk * at, struct fileproc * fp, user_addr_t buffer, uint32_t buffersize, register_t * retval); + + +/* protos for misc */ + +int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo); +void fill_fileinfo(struct fileproc * fp, struct proc_fileinfo * finfo); +static int proc_security_policy(proc_t p); + + +/***************************** proc_info ********************/ + +int +proc_info(__unused struct proc *p, struct proc_info_args * uap, register_t *retval) +{ + return(proc_info_internal(uap->callnum, uap->pid, uap->flavor, uap->arg, uap->buffer, uap->buffersize, retval)); +} + + +int +proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval) +{ + + switch(callnum) { + case 1: /* proc_listpids */ + /* pid contains type and flavor contains typeinfo */ + return(proc_listpids(pid, flavor, buffer, buffersize, retval)); + case 2: /* proc_pidinfo */ + return(proc_pidinfo(pid, flavor, arg, buffer, buffersize, retval)); + case 3: /* proc_pidfdinfo */ + return(proc_pidfdinfo(pid, flavor, (int)arg, buffer, buffersize, retval)); + case 4: /* proc_kernmsgbuf */ + return(proc_kernmsgbuf( buffer, buffersize, retval)); + default: + return(EINVAL); + } + + return(EINVAL); +} + +/******************* proc_listpids routine ****************/ +int +proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t buffersize, register_t * retval) +{ + boolean_t funnel_state; + int numprocs, wantpids; + char * kbuf; + int * ptr; + int n, skip; + struct proc * p; + int error = 0; + + /* if the buffer is null, return num of procs */ + if (buffer == (user_addr_t)0) { + *retval = ((nprocs+20) * sizeof(int)); + return(0); + } + + if (buffersize < sizeof(int)) { + return(ENOMEM); + } + wantpids = buffersize/sizeof(int); + numprocs = nprocs+20; + if (numprocs > wantpids) + numprocs = wantpids; + + kbuf = (char *)kalloc((vm_size_t)(numprocs * sizeof(int))); + bzero(kbuf, sizeof(int)); + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + n = 0; + ptr = (int *)kbuf; + LIST_FOREACH(p, &allproc, p_list) { + skip = 0; + switch (type) { + case PROC_PGRP_ONLY: + if (p->p_pgrp->pg_id != (pid_t)typeinfo) + skip = 1; + break; + case PROC_ALL_PIDS: + skip = 0; + break; + case PROC_TTY_ONLY: + if ((p->p_flag & P_CONTROLT) == 0 || + (p->p_session == NULL) || + p->p_session->s_ttyp == NULL || + p->p_session->s_ttyp->t_dev != (dev_t)typeinfo) + skip = 1; + break; + case PROC_UID_ONLY: + if ((p->p_ucred == NULL) || + (kauth_cred_getuid(p->p_ucred) != (uid_t)typeinfo)) + skip = 1; + break; + case PROC_RUID_ONLY: + if ((p->p_ucred == NULL) || + (p->p_ucred->cr_ruid != (uid_t)typeinfo)) + break; + default: + skip = 1; + break; + }; + + /* Do we have permission to look into this ? */ + if (proc_security_policy(p) != 0) { + skip = 1; + } + + if(skip == 0) { + *ptr++ = p->p_pid; + n++; + } + if (n >= numprocs) + break; + } + + if (n < numprocs) { + LIST_FOREACH(p, &zombproc, p_list) { + *ptr++ = p->p_pid; + n++; + if (n >= numprocs) + break; + } + } + + thread_funnel_set(kernel_flock, funnel_state); + + ptr = (int *)kbuf; + error = copyout((caddr_t)ptr, buffer, n * sizeof(int)); + if (error == 0) + *retval = (n * sizeof(int)); + kfree((void *)kbuf, (vm_size_t)(numprocs * sizeof(int))); + + return(error); +} + + +/********************************** proc_pidinfo routines ********************************/ + +int +proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval) +{ + int numfds, needfds; + char * kbuf; + struct proc_fdinfo * pfd; + struct fileproc * fp; + int n; + int count = 0; + int error = 0; + + numfds = p->p_fd->fd_nfiles; + + if (buffer == (user_addr_t) 0) { + numfds += 20; + *retval = (numfds * sizeof(struct proc_fdinfo)); + return(0); + } + + /* buffersize is big enough atleast for one struct */ + needfds = buffersize/sizeof(struct proc_fdinfo); + + if (numfds > needfds) + numfds = needfds; + + kbuf = (char *)kalloc((vm_size_t)(numfds * sizeof(struct proc_fdinfo))); + bzero(kbuf, numfds * sizeof(struct proc_fdinfo)); + + proc_fdlock(p); + + pfd = (struct proc_fdinfo *)kbuf; + + for (n = 0; ((n < numfds) && (n < p->p_fd->fd_nfiles)); n++) { + if (((fp = p->p_fd->fd_ofiles[n]) != 0) + && ((p->p_fd->fd_ofileflags[n] & UF_RESERVED) == 0)) { + pfd->proc_fd = n; + pfd->proc_fdtype = fp->f_fglob->fg_type; + count++; + pfd++; + } + } + proc_fdunlock(p); + + error = copyout(kbuf, buffer, count * sizeof(struct proc_fdinfo)); + kfree((void *)kbuf, (vm_size_t)(numfds * sizeof(struct proc_fdinfo))); + if (error == 0) + *retval = (count * sizeof(struct proc_fdinfo)); + return(error); +} + + +int +proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd) +{ + register struct tty *tp; + struct session *sessionp = NULL; + + bzero(pbsd, sizeof(struct proc_bsdinfo)); + pbsd->pbi_status = p->p_stat; + pbsd->pbi_xstatus = p->p_xstat; + pbsd->pbi_pid = p->p_pid; + pbsd->pbi_ppid = p->p_pptr->p_pid; + pbsd->pbi_uid = p->p_ucred->cr_uid; + pbsd->pbi_gid = p->p_ucred->cr_uid; /* XXX */ + pbsd->pbi_ruid = p->p_ucred->cr_ruid; + pbsd->pbi_rgid = p->p_ucred->cr_rgid; + pbsd->pbi_svuid = p->p_ucred->cr_svuid; + pbsd->pbi_svgid = p->p_ucred->cr_svgid; + + pbsd->pbi_nice = p->p_nice; + if (p->p_stats) + pbsd->pbi_start = p->p_stats->p_start; + bcopy(&p->p_comm, &pbsd->pbi_comm[0], MAXCOMLEN); + bcopy(&p->p_name, &pbsd->pbi_name[0], 2* MAXCOMLEN); + + pbsd->pbi_flags = 0; + if ((p->p_flag & P_SYSTEM) == P_SYSTEM) + pbsd->pbi_flags |= PROC_FLAG_SYSTEM; + if ((p->p_flag & P_TRACED) == P_TRACED) + pbsd->pbi_flags |= PROC_FLAG_TRACED; + if ((p->p_flag & P_WEXIT) == P_WEXIT) + pbsd->pbi_flags |= PROC_FLAG_INEXIT; + if ((p->p_flag & P_PPWAIT) == P_PPWAIT) + pbsd->pbi_flags |= PROC_FLAG_PPWAIT; + if ((p->p_flag & P_LP64) == P_LP64) + pbsd->pbi_flags |= PROC_FLAG_LP64; + if ((p->p_flag & P_CONTROLT) == P_CONTROLT) + pbsd->pbi_flags |= PROC_FLAG_CONTROLT; + + if (SESS_LEADER(p)) + pbsd->pbi_flags |= PROC_FLAG_SLEADER; + if (p->p_pgrp->pg_session && p->p_pgrp->pg_session->s_ttyvp) + pbsd->pbi_flags |= PROC_FLAG_CTTY; + + pbsd->pbi_nfiles = p->p_fd->fd_nfiles; + if (p->p_pgrp) { + sessionp = p->p_pgrp->pg_session; + pbsd->pbi_pgid = p->p_pgrp->pg_id; + pbsd->pbi_pjobc = p->p_pgrp->pg_jobc; + if ((p->p_flag & P_CONTROLT) && (sessionp) && (tp = sessionp->s_ttyp)) { + pbsd->e_tdev = tp->t_dev; + pbsd->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; + } + } + return(0); +} + + +int +proc_pidtaskinfo(proc_t p, struct proc_taskinfo * ptinfo) +{ + task_t task; + + task = p->task; + + bzero(ptinfo, sizeof(struct proc_taskinfo)); + fill_taskprocinfo(task, (struct proc_taskinfo_internal *)ptinfo); + + return(0); +} + + + +int +proc_pidthreadinfo(proc_t p, uint64_t arg, struct proc_threadinfo *pthinfo) +{ + int error = 0; + uint64_t threadaddr = (uint64_t)arg; + + bzero(pthinfo, sizeof(struct proc_threadinfo)); + + error = fill_taskthreadinfo(p->task, threadaddr, (struct proc_threadinfo_internal *)pthinfo); + if (error) + return(ESRCH); + else + return(0); + +} + + +int +proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval) +{ + int count = 0; + int ret = 0; + int error = 0; + void * kbuf; + int numthreads; + + + count = buffersize/(sizeof(uint64_t)); + numthreads = get_numthreads(p->task); + + numthreads += 10; + + if (numthreads > count) + numthreads = count; + + kbuf = (void *)kalloc(numthreads * sizeof(uint64_t)); + bzero(kbuf, numthreads * sizeof(uint64_t)); + + ret = fill_taskthreadlist(p->task, kbuf, numthreads); + + error = copyout(kbuf, buffer, ret); + kfree(kbuf, numthreads * sizeof(uint64_t)); + if (error == 0) + *retval = ret; + return(error); + +} + + +int +proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +{ + struct proc_regioninfo preginfo; + int ret, error = 0; + + bzero(&preginfo, sizeof(struct proc_regioninfo)); + ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo, (uint32_t *)0, (uint32_t *)0); + if (ret == 0) + return(EINVAL); + error = copyout(&preginfo, buffer, sizeof(struct proc_regioninfo)); + if (error == 0) + *retval = sizeof(struct proc_regioninfo); + return(error); +} + + +int +proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +{ + struct proc_regionwithpathinfo preginfo; + int ret, error = 0; + uint32_t vnodeaddr= 0; + uint32_t vnodeid= 0; + vnode_t vp; + int count; + + bzero(&preginfo, sizeof(struct proc_regionwithpathinfo)); + + ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo.prp_prinfo, (uint32_t *)&vnodeaddr, (uint32_t *)&vnodeid); + if (ret == 0) + return(EINVAL); + if (vnodeaddr) { + vp = (vnode_t)vnodeaddr; + if ((vnode_getwithvid(vp, vnodeid)) == 0) { + /* FILL THE VNODEINFO */ + error = fill_vnodeinfo(vp, &preginfo.prp_vip.vip_vi); + count = MAXPATHLEN; + vn_getpath(vp, &preginfo.prp_vip.vip_path[0], &count); + /* Always make sure it is null terminated */ + preginfo.prp_vip.vip_path[MAXPATHLEN-1] = 0; + vnode_put(vp); + } + } + error = copyout(&preginfo, buffer, sizeof(struct proc_regionwithpathinfo)); + if (error == 0) + *retval = sizeof(struct proc_regionwithpathinfo); + return(error); +} + +int +proc_pidvnodepathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +{ + struct proc_vnodepathinfo pvninfo; + int error = 0; + vnode_t vncdirvp = NULLVP; + uint32_t vncdirid=0; + vnode_t vnrdirvp = NULLVP; + uint32_t vnrdirid=0; + int count; + + bzero(&pvninfo, sizeof(struct proc_vnodepathinfo)); + + proc_fdlock(p); + if (p->p_fd->fd_cdir) { + vncdirvp = p->p_fd->fd_cdir; + vncdirid = p->p_fd->fd_cdir->v_id; + } + if (p->p_fd->fd_rdir) { + vnrdirvp = p->p_fd->fd_rdir; + vnrdirid = p->p_fd->fd_rdir->v_id; + } + proc_fdunlock(p); + + if (vncdirvp != NULLVP) { + if ((error = vnode_getwithvid(vncdirvp, vncdirid)) == 0) { + /* FILL THE VNODEINFO */ + error = fill_vnodeinfo(vncdirvp, &pvninfo.pvi_cdir.vip_vi); + if ( error == 0) { + count = MAXPATHLEN; + vn_getpath(vncdirvp, &pvninfo.pvi_cdir.vip_path[0], &count); + pvninfo.pvi_cdir.vip_path[MAXPATHLEN-1] = 0; + } + vnode_put(vncdirvp); + } else { + goto out; + } + } + + if ((error == 0) && (vnrdirvp != NULLVP)) { + if ((error = vnode_getwithvid(vnrdirvp, vnrdirid)) == 0) { + /* FILL THE VNODEINFO */ + error = fill_vnodeinfo(vnrdirvp, &pvninfo.pvi_rdir.vip_vi); + if ( error == 0) { + count = MAXPATHLEN; + vn_getpath(vnrdirvp, &pvninfo.pvi_rdir.vip_path[0], &count); + pvninfo.pvi_rdir.vip_path[MAXPATHLEN-1] = 0; + } + vnode_put(vnrdirvp); + } else { + goto out; + } + } + if (error == 0) { + error = copyout(&pvninfo, buffer, sizeof(struct proc_vnodepathinfo)); + if (error == 0) + *retval = sizeof(struct proc_vnodepathinfo); + } +out: + return(error); +} + +/********************************** proc_pidinfo ********************************/ + + +int +proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval) +{ + struct proc * p = PROC_NULL; + int error = ENOTSUP; + int gotref = 0; + int findzomb = 0; + boolean_t funnel_state; + uint32_t size; + + switch (flavor) { + case PROC_PIDLISTFDS: + size = PROC_PIDLISTFD_SIZE; + if (buffer == (user_addr_t)0) + size = 0; + break; + case PROC_PIDTBSDINFO: + size = PROC_PIDTBSDINFO_SIZE; + break; + case PROC_PIDTASKINFO: + size = PROC_PIDTASKINFO_SIZE; + break; + case PROC_PIDTASKALLINFO: + size = PROC_PIDTASKALLINFO_SIZE; + break; + case PROC_PIDTHREADINFO: + size = PROC_PIDTHREADINFO_SIZE; + break; + case PROC_PIDLISTTHREADS: + size = PROC_PIDLISTTHREADS_SIZE; + break; + case PROC_PIDREGIONINFO: + size = PROC_PIDREGIONINFO_SIZE; + break; + case PROC_PIDREGIONPATHINFO: + size = PROC_PIDREGIONPATHINFO_SIZE; + break; + case PROC_PIDVNODEPATHINFO: + size = PROC_PIDVNODEPATHINFO_SIZE; + break; + default: + return(EINVAL); + } + + if (buffersize < size) + return(ENOMEM); + + if (flavor != PROC_PIDTBSDINFO) { + if ((p = proc_findref(pid)) == PROC_NULL) { + error = ESRCH; + goto out; + } else { + gotref = 1; + + /* Do we have permission to look into this ? */ + if ((error = proc_security_policy(p)) != 0) { + goto out; + } + } + } + switch (flavor) { + case PROC_PIDLISTFDS: { + error = proc_pidfdlist(p, buffer, buffersize, retval); + } + break; + + case PROC_PIDTBSDINFO: { + struct proc_bsdinfo pbsd; + + if (arg) + findzomb = 1; + funnel_state = thread_funnel_set(kernel_flock, TRUE); + p = pfind(pid); + if (p == PROC_NULL) { + if (findzomb) + p = pzfind(pid); + if (p == NULL) { + error = ESRCH; + thread_funnel_set(kernel_flock, funnel_state); + goto out; + } + } + /* Do we have permission to look into this ? */ + if ((error = proc_security_policy(p)) != 0) { + thread_funnel_set(kernel_flock, funnel_state); + goto out; + } + error = proc_pidbsdinfo(p, &pbsd); + thread_funnel_set(kernel_flock, funnel_state); + if (error == 0) { + error = copyout(&pbsd, buffer, sizeof(struct proc_bsdinfo)); + if (error == 0) + *retval = sizeof(struct proc_bsdinfo); + } + } + break; + + case PROC_PIDTASKINFO: { + struct proc_taskinfo ptinfo; + + error = proc_pidtaskinfo(p, &ptinfo); + if (error == 0) { + error = copyout(&ptinfo, buffer, sizeof(struct proc_taskinfo)); + if (error == 0) + *retval = sizeof(struct proc_taskinfo); + } + } + break; + + case PROC_PIDTASKALLINFO: { + struct proc_taskallinfo pall; + + error = proc_pidbsdinfo(p, &pall.pbsd); + error = proc_pidtaskinfo(p, &pall.ptinfo); + if (error == 0) { + error = copyout(&pall, buffer, sizeof(struct proc_taskallinfo)); + if (error == 0) + *retval = sizeof(struct proc_taskallinfo); + } + } + break; + + case PROC_PIDTHREADINFO:{ + struct proc_threadinfo pthinfo; + + error = proc_pidthreadinfo(p, arg, &pthinfo); + if (error == 0) { + error = copyout(&pthinfo, buffer, sizeof(struct proc_threadinfo)); + if (error == 0) + *retval = sizeof(struct proc_threadinfo); + } + } + break; + + case PROC_PIDLISTTHREADS:{ + error = proc_pidlistthreads(p, buffer, buffersize, retval); + } + break; + + case PROC_PIDREGIONINFO:{ + error = proc_pidregioninfo(p, arg, buffer, buffersize, retval); + } + break; + + + case PROC_PIDREGIONPATHINFO:{ + error = proc_pidregionpathinfo(p, arg, buffer, buffersize, retval); + } + break; + + case PROC_PIDVNODEPATHINFO:{ + error = proc_pidvnodepathinfo(p, arg, buffer, buffersize, retval); + } + break; + + default: + error = ENOTSUP; + } + +out: + if (gotref) + proc_dropref(p); + return(error); +} + + +int +pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct vnode_fdinfo vfi; + int error= 0; + + if ((error = vnode_getwithvid(vp, vid)) != 0) { + return(error); + } + bzero(&vfi, sizeof(struct vnode_fdinfo)); + fill_fileinfo(fp, &vfi.pfi); + error = fill_vnodeinfo(vp, &vfi.pvi); + vnode_put(vp); + if (error == 0) { + error = copyout((caddr_t)&vfi, buffer, sizeof(struct vnode_fdinfo)); + if (error == 0) + *retval = sizeof(struct vnode_fdinfo); + } + return(error); +} + +int +pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct vnode_fdinfowithpath vfip; + int count, error= 0; + + if ((error = vnode_getwithvid(vp, vid)) != 0) { + return(error); + } + bzero(&vfip, sizeof(struct vnode_fdinfowithpath)); + fill_fileinfo(fp, &vfip.pfi); + error = fill_vnodeinfo(vp, &vfip.pvip.vip_vi) ; + if (error == 0) { + count = MAXPATHLEN; + vn_getpath(vp, &vfip.pvip.vip_path[0], &count); + vfip.pvip.vip_path[MAXPATHLEN-1] = 0; + vnode_put(vp); + error = copyout((caddr_t)&vfip, buffer, sizeof(struct vnode_fdinfowithpath)); + if (error == 0) + *retval = sizeof(struct vnode_fdinfowithpath); + } else + vnode_put(vp); + return(error); +} + +void +fill_fileinfo(struct fileproc * fp, struct proc_fileinfo * fproc) +{ + fproc->fi_openflags = fp->f_fglob->fg_flag; + fproc->fi_status = fp->f_flags; + fproc->fi_offset = fp->f_fglob->fg_offset; + fproc->fi_type = fp->f_fglob->fg_type; +} + + + +int +fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo) +{ + vfs_context_t context; + struct stat * sb; + int error = 0; + + sb = &vinfo->vi_stat; + + context = vfs_context_create((vfs_context_t)0); + error = vn_stat(vp, sb, NULL, context); + (void)vfs_context_rele(context); + + if (error != 0) + goto out; + + if (vp->v_mount != dead_mountp) { + vinfo->vi_fsid = vp->v_mount->mnt_vfsstat.f_fsid; + } else { + vinfo->vi_fsid.val[0] = 0; + vinfo->vi_fsid.val[1] = 0; + } + vinfo->vi_type = vp->v_type; +out: + return(error); +} + +int +pid_socketinfo(socket_t so, struct fileproc *fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct socket_fdinfo s; + int error = 0; + + bzero(&s, sizeof(struct socket_fdinfo)); + fill_fileinfo(fp, &s.pfi); + if ((error = fill_socketinfo(so, &s.psi)) == 0) { + if ((error = copyout(&s, buffer, sizeof(struct socket_fdinfo))) == 0) + *retval = sizeof(struct socket_fdinfo); + } + + return (error); +} + +int +pid_pseminfo(struct psemnode *psem, struct fileproc *fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct psem_fdinfo pseminfo; + int error = 0; + + bzero(&pseminfo, sizeof(struct psem_fdinfo)); + fill_fileinfo(fp, &pseminfo.pfi); + + if ((error = fill_pseminfo(psem, &pseminfo.pseminfo)) == 0) { + if ((error = copyout(&pseminfo, buffer, sizeof(struct psem_fdinfo))) == 0) + *retval = sizeof(struct psem_fdinfo); + } + + return(error); +} + +int +pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct pshm_fdinfo pshminfo; + int error = 0; + + bzero(&pshminfo, sizeof(struct pshm_fdinfo)); + fill_fileinfo(fp, &pshminfo.pfi); + + if ((error = fill_pshminfo(pshm, &pshminfo.pshminfo)) == 0) { + if ((error = copyout(&pshminfo, buffer, sizeof(struct pshm_fdinfo))) == 0) + *retval = sizeof(struct pshm_fdinfo); + } + + return(error); +} + +int +pid_pipeinfo(struct pipe * p, struct fileproc *fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct pipe_fdinfo pipeinfo; + int error = 0; + + bzero(&pipeinfo, sizeof(struct pipe_fdinfo)); + fill_fileinfo(fp, &pipeinfo.pfi); + if ((error = fill_pipeinfo(p, &pipeinfo.pipeinfo)) == 0) { + if ((error = copyout(&pipeinfo, buffer, sizeof(struct pipe_fdinfo))) == 0) + *retval = sizeof(struct pipe_fdinfo); + } + + return(error); +} + +int +pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +{ + struct kqueue_fdinfo kqinfo; + int error = 0; + + bzero(&kqinfo, sizeof(struct kqueue_fdinfo)); + + fill_fileinfo(fp, &kqinfo.pfi); + + if ((error = fill_kqueueinfo(kq, &kqinfo.kqueueinfo)) == 0) { + if ((error = copyout(&kqinfo, buffer, sizeof(struct kqueue_fdinfo))) == 0) + *retval = sizeof(struct kqueue_fdinfo); + } + + return(error); +} + +int +pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused register_t * retval) +{ + return ENOTSUP; +} + + + +/************************** proc_pidfdinfo routine ***************************/ +int +proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffersize, register_t * retval) +{ + proc_t p; + int error = ENOTSUP; + struct fileproc * fp; + uint32_t size; + + + switch (flavor) { + case PROC_PIDFDVNODEINFO: + size = PROC_PIDFDVNODEINFO_SIZE; + break; + case PROC_PIDFDVNODEPATHINFO: + size = PROC_PIDFDVNODEPATHINFO_SIZE; + break; + case PROC_PIDFDSOCKETINFO: + size = PROC_PIDFDSOCKETINFO_SIZE; + break; + case PROC_PIDFDPSEMINFO: + size = PROC_PIDFDPSEMINFO_SIZE; + break; + case PROC_PIDFDPSHMINFO: + size = PROC_PIDFDPSHMINFO_SIZE; + break; + case PROC_PIDFDPIPEINFO: + size = PROC_PIDFDPIPEINFO_SIZE; + break; + case PROC_PIDFDKQUEUEINFO: + size = PROC_PIDFDKQUEUEINFO_SIZE; + break; + case PROC_PIDFDATALKINFO: + size = PROC_PIDFDATALKINFO_SIZE; + break; + + default: + return(EINVAL); + + } + + if (buffersize < size) + return(ENOMEM); + + if ((p = proc_findref(pid)) == PROC_NULL) { + error = ESRCH; + goto out; + } + /* Do we have permission to look into this ? */ + if ((error = proc_security_policy(p)) != 0) { + goto out1; + } + + switch (flavor) { + case PROC_PIDFDVNODEINFO: { + vnode_t vp; + uint32_t vid=0; + + if ((error = fp_getfvpandvid(p, fd, &fp, &vp, &vid)) !=0) { + goto out1; + } + error = pid_vnodeinfo(vp, vid, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDVNODEPATHINFO: { + vnode_t vp; + uint32_t vid=0; + + if ((error = fp_getfvpandvid(p, fd, &fp, &vp, &vid)) !=0) { + goto out1; + } + + error = pid_vnodeinfopath(vp, vid, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDSOCKETINFO: { + socket_t so; + + if ((error = fp_getfsock(p, fd, &fp, &so)) !=0) { + goto out1; + } + error = pid_socketinfo(so, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDPSEMINFO: { + struct psemnode * psem; + + if ((error = fp_getfpsem(p, fd, &fp, &psem)) !=0) { + goto out1; + } + error = pid_pseminfo(psem, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDPSHMINFO: { + struct pshmnode * pshm; + + if ((error = fp_getfpshm(p, fd, &fp, &pshm)) !=0) { + goto out1; + } + error = pid_pshminfo(pshm, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDPIPEINFO: { + struct pipe * cpipe; + + if ((error = fp_getfpipe(p, fd, &fp, &cpipe)) !=0) { + goto out1; + } + error = pid_pipeinfo(cpipe, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDKQUEUEINFO: { + struct kqueue * kq; + + if ((error = fp_getfkq(p, fd, &fp, &kq)) !=0) { + goto out1; + } + error = pid_kqueueinfo(kq, fp, buffer, buffersize, retval); + } + break; + + case PROC_PIDFDATALKINFO: { + struct atalk * at; + + if ((error = fp_getfatalk(p, fd, &fp, &at)) !=0) { + goto out1; + } + + error = pid_atalkinfo(at, fp, buffer, buffersize, retval); + } + break; + + default: { + error = EINVAL; + } + break; + + } + + fp_drop(p, fd, fp , 0); +out1 : + proc_dropref(p); +out: + return(error); +} + + +static int +proc_security_policy(proc_t p) +{ + if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) + && suser(kauth_cred_get(), (u_short *)0)) { + return(EPERM); + } + + return(0); +} + + +/* Temporary hack to get dmesg to work. In Leopard this will disappear */ +int +proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, register_t * retval) +{ + int error; + + if (buffersize < sizeof(struct msgbuf)) + return(ENOMEM); + + if (suser(kauth_cred_get(), (u_short *)0) == 0) { + error = copyout(msgbufp, buffer, sizeof(struct msgbuf)); + if (error == 0) + *retval = sizeof(struct msgbuf); + return(error); + } else + return(EPERM); +} + diff --git a/bsd/kern/socket_info.c b/bsd/kern/socket_info.c new file mode 100644 index 000000000..179f95104 --- /dev/null +++ b/bsd/kern/socket_info.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void fill_sockbuf_info(struct sockbuf *sb, struct sockbuf_info *sbi); +static void fill_common_sockinfo(struct socket *so, struct socket_info *si); + +static void +fill_sockbuf_info(struct sockbuf *sb, struct sockbuf_info *sbi) +{ + sbi->sbi_cc = sb->sb_cc; + sbi->sbi_hiwat = sb->sb_hiwat; + sbi->sbi_mbcnt = sb->sb_mbcnt; + sbi->sbi_mbmax = sb->sb_mbmax; + sbi->sbi_lowat = sb->sb_lowat; + sbi->sbi_flags = sb->sb_flags; + sbi->sbi_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; + if (sbi->sbi_timeo == 0 && sb->sb_timeo.tv_usec != 0) + sbi->sbi_timeo = 1; +} + +static void +fill_common_sockinfo(struct socket *so, struct socket_info *si) +{ + si->soi_so = (u_int64_t)((uintptr_t)so); + si->soi_type = so->so_type; + si->soi_options = so->so_options; + si->soi_linger = so->so_linger; + si->soi_state = so->so_state; + si->soi_pcb = (u_int64_t)((uintptr_t)so->so_pcb); + if (so->so_proto) { + si->soi_protocol = so->so_proto->pr_protocol; + if (so->so_proto->pr_domain) + si->soi_family = so->so_proto->pr_domain->dom_family; + else + si->soi_family = 0; + } else + si->soi_protocol = si->soi_family = 0; + si->soi_qlen = so->so_qlen; + si->soi_incqlen = so->so_incqlen; + si->soi_qlimit = so->so_qlimit; + si->soi_timeo = so->so_timeo; + si->soi_error = so->so_error; + si->soi_oobmark = so->so_oobmark; + fill_sockbuf_info(&so->so_snd, &si->soi_snd); + fill_sockbuf_info(&so->so_rcv, &si->soi_rcv); + +} + +errno_t +fill_socketinfo(struct socket *so, struct socket_info *si) +{ + errno_t error = 0; + int family; + short type; + short protocol; + + socket_lock(so, 0); + + si->soi_kind = SOCKINFO_GENERIC; + + fill_common_sockinfo(so, si); + + if (so->so_pcb == 0 || so->so_proto == 0 || so->so_proto->pr_domain == 0) + goto out; + + /* The kind of socket is determined by the triplet {family, type, protocol} */ + family = so->so_proto->pr_domain->dom_family; + type = so->so_proto->pr_type; + protocol = so->so_proto->pr_protocol; + switch (family) { + case AF_INET: + case AF_INET6: { + struct in_sockinfo *insi = &si->soi_proto.pri_in; + struct inpcb *inp = (struct inpcb *)so->so_pcb; + + si->soi_kind = SOCKINFO_IN; + + insi->insi_fport = inp->inp_fport; + insi->insi_lport = inp->inp_lport; + insi->insi_gencnt = inp->inp_gencnt; + insi->insi_flags = inp->inp_flags; + insi->insi_vflag = inp->inp_vflag; + insi->insi_ip_ttl = inp->inp_ip_ttl; + insi->insi_faddr.ina_6 = inp->inp_dependfaddr.inp6_foreign; + insi->insi_laddr.ina_6 = inp->inp_dependladdr.inp6_local; + insi->insi_v4.in4_tos = inp->inp_depend4.inp4_ip_tos; + insi->insi_v6.in6_hlim = inp->inp_depend6.inp6_hlim; + insi->insi_v6.in6_cksum = inp->inp_depend6.inp6_cksum; + insi->insi_v6.in6_ifindex = inp->inp6_ifindex; + insi->insi_v6.in6_hops = inp->inp_depend6.inp6_hops; + + if (type == SOCK_STREAM && (protocol == 0 || protocol == IPPROTO_TCP) && inp->inp_ppcb != 0) { + struct tcp_sockinfo *tcpsi = &si->soi_proto.pri_tcp; + struct tcpcb *tp= (struct tcpcb *)inp->inp_ppcb; + + si->soi_kind = SOCKINFO_TCP; + + tcpsi->tcpsi_state = tp->t_state; + tcpsi->tcpsi_timer[TCPT_REXMT] = tp->t_timer[TCPT_REXMT]; + tcpsi->tcpsi_timer[TCPT_PERSIST] = tp->t_timer[TCPT_PERSIST]; + tcpsi->tcpsi_timer[TCPT_KEEP] = tp->t_timer[TCPT_KEEP]; + tcpsi->tcpsi_timer[TCPT_2MSL] = tp->t_timer[TCPT_2MSL]; + tcpsi->tcpsi_mss = tp->t_maxseg; + tcpsi->tcpsi_flags = tp->t_flags; + tcpsi->tcpsi_tp = (u_int64_t)((uintptr_t)tp); + } + break; + } + case AF_UNIX: { + struct unpcb *unp = (struct unpcb *)so->so_pcb; + struct un_sockinfo *unsi = &si->soi_proto.pri_un; + + si->soi_kind = SOCKINFO_UN; + + unsi->unsi_conn_pcb = (uint64_t)((uintptr_t)unp->unp_conn); + if (unp->unp_conn) + unsi->unsi_conn_so = (uint64_t)((uintptr_t)unp->unp_conn->unp_socket); + + + if (unp->unp_addr) { + size_t addrlen = unp->unp_addr->sun_len; + + if (addrlen > SOCK_MAXADDRLEN) + addrlen = SOCK_MAXADDRLEN; + bcopy(unp->unp_addr, &unsi->unsi_addr, addrlen); + } + if (unp->unp_conn && unp->unp_conn->unp_addr) { + size_t addrlen = unp->unp_conn->unp_addr->sun_len; + + if (addrlen > SOCK_MAXADDRLEN) + addrlen = SOCK_MAXADDRLEN; + bcopy(unp->unp_conn->unp_addr, &unsi->unsi_caddr, addrlen); + } + break; + } + case AF_NDRV: { + struct ndrv_cb *ndrv_cb = (struct ndrv_cb *)so->so_pcb; + struct ndrv_info *ndrvsi = &si->soi_proto.pri_ndrv; + + si->soi_kind = SOCKINFO_NDRV; + + /* TDB lock ifnet ???? */ + if (ndrv_cb->nd_if != 0) { + struct ifnet *ifp = ndrv_cb->nd_if; + + ndrvsi->ndrvsi_if_family = ifp->if_family; + ndrvsi->ndrvsi_if_unit = ifp->if_unit; + strncpy(ndrvsi->ndrvsi_if_name, ifp->if_name, IFNAMSIZ); + } + + break; + } + case AF_SYSTEM: + if (so->so_proto->pr_protocol == SYSPROTO_EVENT) { + struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; + struct kern_event_info *kesi = &si->soi_proto.pri_kern_event; + + si->soi_kind = SOCKINFO_KERN_EVENT; + + kesi->kesi_vendor_code_filter = ev_pcb->vendor_code_filter; + kesi->kesi_class_filter = ev_pcb->class_filter; + kesi->kesi_subclass_filter = ev_pcb->subclass_filter; + + } else if (so->so_proto->pr_protocol == SYSPROTO_CONTROL) { + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kern_ctl_info *kcsi = &si->soi_proto.pri_kern_ctl; + struct kctl *kctl = kcb->kctl; + + + si->soi_kind = SOCKINFO_KERN_CTL; + + if (kctl == 0) + break; + kcsi->kcsi_id = kctl->id; + kcsi->kcsi_reg_unit = kctl->id; + kcsi->kcsi_flags = kctl->flags; + kcsi->kcsi_recvbufsize = kctl->recvbufsize; + kcsi->kcsi_sendbufsize = kctl->sendbufsize; + kcsi->kcsi_unit = kcb->unit; + strncpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); + } + break; + + case AF_APPLETALK: + break; + + case AF_ROUTE: + break; + + case AF_PPP: + break; + + default: + break; + } +out: + socket_unlock(so, 0); + + return error; +} + diff --git a/bsd/kern/subr_prof.c b/bsd/kern/subr_prof.c index 9b3791f0a..f0151a07d 100644 --- a/bsd/kern/subr_prof.c +++ b/bsd/kern/subr_prof.c @@ -137,7 +137,6 @@ kmstartup(void) mcount_lock_grp = lck_grp_alloc_init("MCOUNT", LCK_GRP_ATTR_NULL); mcount_lock_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(mcount_lock_attr); mcount_lock = lck_spin_alloc_init(mcount_lock_grp, mcount_lock_attr); } diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index 44ee91c82..1b92b6f00 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -1700,8 +1700,6 @@ selthreadclear(sip) -#define DBG_EVENT 0x10 - #define DBG_POST 0x10 #define DBG_WATCH 0x11 #define DBG_WAIT 0x12 diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 2fb396aa0..d9c75707c 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -105,6 +105,7 @@ #include #include #include +#include #include @@ -260,14 +261,12 @@ pipeinit(void *dummy __unused) * allocate lock group attribute and group for pipe mutexes */ pipe_mtx_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(pipe_mtx_grp_attr); pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr); /* * allocate the lock attribute for pipe mutexes */ pipe_mtx_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(pipe_mtx_attr); } @@ -1410,7 +1409,7 @@ pipe_free_kmem(struct pipe *cpipe) if (cpipe->pipe_buffer.buffer != NULL) { if (cpipe->pipe_buffer.size > PIPE_SIZE) OSAddAtomic(-1, (SInt32 *)&nbigpipe); - OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva); + OSAddAtomic(-(cpipe->pipe_buffer.size), (SInt32 *)&amountpipekva); OSAddAtomic(-1, (SInt32 *)&amountpipes); kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer, @@ -1644,3 +1643,66 @@ filt_pipewrite(struct knote *kn, long hint) return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? kn->kn_sdata : PIPE_BUF)); } + +int +fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) +{ +#ifdef MAC + int error; +#endif + struct timeval now; + struct stat * ub; + + if (cpipe == NULL) + return (EBADF); +#ifdef MAC + PIPE_LOCK(cpipe); + error = mac_check_pipe_stat(active_cred, cpipe); + PIPE_UNLOCK(cpipe); + if (error) + return (error); +#endif + if (cpipe->pipe_buffer.buffer == 0) { + /* + * must be stat'ing the write fd + */ + cpipe = cpipe->pipe_peer; + + if (cpipe == NULL) + return (EBADF); + } + + ub = &pinfo->pipe_stat; + + bzero(ub, sizeof(*ub)); + ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + ub->st_blksize = cpipe->pipe_buffer.size; + ub->st_size = cpipe->pipe_buffer.cnt; + if (ub->st_blksize != 0); + ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; + ub->st_nlink = 1; + + ub->st_uid = kauth_getuid(); + ub->st_gid = kauth_getgid(); + + microtime(&now); + ub->st_atimespec.tv_sec = now.tv_sec; + ub->st_atimespec.tv_nsec = now.tv_usec * 1000; + + ub->st_mtimespec.tv_sec = now.tv_sec; + ub->st_mtimespec.tv_nsec = now.tv_usec * 1000; + + ub->st_ctimespec.tv_sec = now.tv_sec; + ub->st_ctimespec.tv_nsec = now.tv_usec * 1000; + + /* + * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. + * XXX (st_dev, st_ino) should be unique. + */ + + pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe); + pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer)); + pinfo->pipe_status = cpipe->pipe_state; + return (0); +} + diff --git a/bsd/kern/syscalls.c b/bsd/kern/syscalls.c index d733cfc88..503f43f00 100644 --- a/bsd/kern/syscalls.c +++ b/bsd/kern/syscalls.c @@ -134,11 +134,7 @@ const char *syscallnames[] = { "getpriority", /* 100 = getpriority */ "#101", /* 101 = old send */ "#102", /* 102 = old recv */ -#ifdef __ppc__ "#103", /* 103 = old sigreturn */ -#else - "sigreturn", /* 103 = sigreturn */ -#endif "bind", /* 104 = bind */ "setsockopt", /* 105 = setsockopt */ "listen", /* 106 = listen */ @@ -151,11 +147,7 @@ const char *syscallnames[] = { "#113", /* 113 = old recvmsg */ "#114", /* 114 = old sendmsg */ "#115", /* 115 = old vtrace */ -#ifdef __ppc__ - "ppc_gettimeofday", /* 116 = ppc_gettimeofday */ -#else "gettimeofday", /* 116 = gettimeofday */ -#endif "getrusage", /* 117 = getrusage */ "getsockopt", /* 118 = getsockopt */ "#119", /* 119 = old resuba */ @@ -231,12 +223,8 @@ const char *syscallnames[] = { "setgid", /* 181 = setgid */ "setegid", /* 182 = setegid */ "seteuid", /* 183 = seteuid */ -#ifdef __ppc__ "sigreturn", /* 184 = sigreturn */ -#else - "#184", /* 184 = */ -#endif - "#185", /* 185 = */ + "chud", /* 185 = chud */ "#186", /* 186 = */ "#187", /* 187 = */ "stat", /* 188 = stat */ @@ -261,16 +249,6 @@ const char *syscallnames[] = { "mlock", /* 203 = mlock */ "munlock", /* 204 = munlock */ "undelete", /* 205 = undelete */ -#ifdef __ppc__ - "ATsocket", /* 206 = ATsocket */ - "ATgetmsg", /* 207 = ATgetmsg */ - "ATputmsg", /* 208 = ATputmsg */ - "ATPsndreq", /* 209 = ATPsndreq */ - "ATPsndrsp", /* 210 = ATPsndrsp */ - "ATPgetreq", /* 211 = ATPgetreq */ - "ATPgetrsp", /* 212 = ATPgetrsp */ - "#213", /* 213 = Reserved for AppleTalk */ -#else "ATsocket", /* 206 = ATsocket */ "ATgetmsg", /* 207 = ATgetmsg */ "ATputmsg", /* 208 = ATputmsg */ @@ -279,7 +257,6 @@ const char *syscallnames[] = { "ATPgetreq", /* 211 = ATPgetreq */ "ATPgetrsp", /* 212 = ATPgetrsp */ "#213", /* 213 = Reserved for AppleTalk */ -#endif /* __ppc__ */ "kqueue_from_portset_np", /* 214 = kqueue_from_portset_np */ "kqueue_portset_np", /* 215 = kqueue_portset_np */ "mkcomplex", /* 216 = mkcomplex soon to be obsolete */ @@ -411,7 +388,7 @@ const char *syscallnames[] = { "__pthread_canceled", /* 333 = __pthread_canceled */ "__semwait_signal", /* 334 = __semwait_signal */ "utrace", /* 335 = utrace */ - "#336", /* 336 = */ + "proc_info", /* 336 = proc_info */ "#337", /* 337 = */ "#338", /* 338 = */ "#339", /* 339 = */ @@ -440,7 +417,7 @@ const char *syscallnames[] = { "kqueue", /* 362 = kqueue */ "kevent", /* 363 = kevent */ "lchown", /* 364 = lchown */ - "#365", /* 365 = */ + "stack_snapshot", /* 365 = stack_snapshot */ "#366", /* 366 = */ "#367", /* 367 = */ "#368", /* 368 = */ diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master index a051ba6cf..0c0008058 100644 --- a/bsd/kern/syscalls.master +++ b/bsd/kern/syscalls.master @@ -136,13 +136,7 @@ 100 NONE KERN ALL { int getpriority(int which, int who); } 101 NONE NONE ALL { int nosys(void); } { old send } 102 NONE NONE ALL { int nosys(void); } { old recv } - -#ifdef __ppc__ 103 NONE NONE ALL { int nosys(void); } { old sigreturn } -#else -103 NONE KERN UALL { int sigreturn(struct sigcontext *sigcntxp); } -#endif - 104 NONE NONE ALL { int bind(int s, caddr_t name, socklen_t namelen); } 105 NONE NONE ALL { int setsockopt(int s, int level, int name, caddr_t val, socklen_t valsize); } 106 NONE NONE ALL { int listen(int s, int backlog); } @@ -155,13 +149,7 @@ 113 NONE NONE ALL { int nosys(void); } { old recvmsg } 114 NONE NONE ALL { int nosys(void); } { old sendmsg } 115 NONE NONE ALL { int nosys(void); } { old vtrace } - -#ifdef __ppc__ -116 NONE NONE ALL { int ppc_gettimeofday(struct timeval *tp, struct timezone *tzp); } -#else 116 NONE NONE ALL { int gettimeofday(struct timeval *tp, struct timezone *tzp); } -#endif - 117 NONE KERN ALL { int getrusage(int who, struct rusage *rusage); } 118 NONE NONE ALL { int getsockopt(int s, int level, int name, caddr_t val, socklen_t *avalsize); } 119 NONE NONE ALL { int nosys(void); } { old resuba } @@ -241,14 +229,8 @@ 181 NONE KERN ALL { int setgid(gid_t gid); } 182 NONE KERN ALL { int setegid(gid_t egid); } 183 NONE KERN ALL { int seteuid(uid_t euid); } - -#ifdef __ppc__ 184 NONE KERN ALL { int sigreturn(struct ucontext *uctx, int infostyle); } -#else -184 NONE NONE ALL { int nosys(void); } -#endif - -185 NONE NONE ALL { int nosys(void); } +185 NONE NONE UALL { int chud(int code, int arg1, int arg2, int arg3, int arg4, int arg5); } 186 NONE NONE ALL { int nosys(void); } 187 NONE NONE ALL { int nosys(void); } 188 NONE NONE ALL { int stat(user_addr_t path, user_addr_t ub); } @@ -276,7 +258,6 @@ 204 NONE NONE ALL { int munlock(caddr_t addr, size_t len); } 205 NONE NONE ALL { int undelete(user_addr_t path); } -#ifdef __ppc__ 206 NONE NONE ALL { int ATsocket(int proto); } 207 NONE NONE UALL { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } 208 NONE NONE UALL { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } @@ -285,16 +266,6 @@ 211 NONE NONE UALL { int ATPgetreq(int fd, unsigned char *buf, int buflen); } 212 NONE NONE UALL { int ATPgetrsp(int fd, unsigned char *bdsp); } 213 NONE NONE ALL { int nosys(void); } { Reserved for AppleTalk } -#else -206 NONE NONE HN { int ATsocket(int proto); } -207 NONE NONE UHN { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } -208 NONE NONE UHN { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } -209 NONE NONE UHN { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } -210 NONE NONE UHN { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } -211 NONE NONE UHN { int ATPgetreq(int fd, unsigned char *buf, int buflen); } -212 NONE NONE UHN { int ATPgetrsp(int fd, unsigned char *bdsp); } -213 NONE NONE ALL { int nosys(void); } { Reserved for AppleTalk } -#endif /* __ppc__ */ 214 NONE KERN ALL { int kqueue_from_portset_np(int portset); } 215 NONE KERN ALL { int kqueue_portset_np(int fd); } @@ -438,7 +409,7 @@ 333 NONE NONE ALL { int __pthread_canceled(int action); } 334 POST NONE ALL { int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, time_t tv_sec, int32_t tv_nsec); } 335 NONE KERN ALL { int utrace(const void *addr, size_t len); } -336 NONE NONE ALL { int nosys(void); } +336 NONE NONE ALL { int proc_info(int32_t callnum,int32_t pid,uint32_t flavor, uint64_t arg,user_addr_t buffer,int32_t buffersize); } 337 NONE NONE ALL { int nosys(void); } 338 NONE NONE ALL { int nosys(void); } 339 NONE NONE ALL { int nosys(void); } @@ -467,7 +438,7 @@ 362 NONE NONE ALL { int kqueue(void); } 363 NONE NONE ALL { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 364 NONE NONE ALL { int lchown(user_addr_t path, uid_t owner, gid_t group); } -365 NONE NONE ALL { int nosys(void); } +365 NONE NONE ALL { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options); } 366 NONE NONE ALL { int nosys(void); } 367 NONE NONE ALL { int nosys(void); } 368 NONE NONE ALL { int nosys(void); } diff --git a/bsd/kern/sysctl_init.c b/bsd/kern/sysctl_init.c index 42ac8142c..4ba803528 100644 --- a/bsd/kern/sysctl_init.c +++ b/bsd/kern/sysctl_init.c @@ -50,9 +50,12 @@ extern struct sysctl_oid sysctl__hw_logicalcpu_max; extern struct sysctl_oid sysctl__hw_byteorder; extern struct sysctl_oid sysctl__hw_cputype; extern struct sysctl_oid sysctl__hw_cpusubtype; +extern struct sysctl_oid sysctl__hw_cpufamily; +extern struct sysctl_oid sysctl__hw_cacheconfig; extern struct sysctl_oid sysctl__hw_physmem; extern struct sysctl_oid sysctl__hw_usermem; extern struct sysctl_oid sysctl__hw_pagesize; +extern struct sysctl_oid sysctl__hw_pms; extern struct sysctl_oid sysctl__hw_epoch; extern struct sysctl_oid sysctl__hw_vectorunit; extern struct sysctl_oid sysctl__hw_busfrequency; @@ -84,6 +87,12 @@ extern struct sysctl_oid sysctl__hw_l2cachesize_compat; extern struct sysctl_oid sysctl__hw_l3cachesize_compat; extern struct sysctl_oid sysctl__hw_tbfrequency_compat; +#if __i386__ +extern struct sysctl_oid sysctl__sysctl_proc_exec_affinity; +#endif +extern struct sysctl_oid sysctl__sysctl_proc_cputype; +extern struct sysctl_oid sysctl__sysctl_proc_native; + extern struct sysctl_oid sysctl__kern_sysv_shmmax; extern struct sysctl_oid sysctl__kern_sysv_shmmin; extern struct sysctl_oid sysctl__kern_sysv_shmmni; @@ -100,6 +109,10 @@ extern struct sysctl_oid sysctl__kern_sysv_semmnu; extern struct sysctl_oid sysctl__kern_sysv_semmsl; extern struct sysctl_oid sysctl__kern_sysv_semume; +extern struct sysctl_oid sysctl__kern_tfp_policy; +extern struct sysctl_oid sysctl__kern_tfp_read_group; +extern struct sysctl_oid sysctl__kern_tfp_rw_group; + extern struct sysctl_oid sysctl__kern_dummy; extern struct sysctl_oid sysctl__kern_ipc_maxsockbuf; extern struct sysctl_oid sysctl__kern_ipc_mbstat; @@ -113,7 +126,12 @@ extern struct sysctl_oid sysctl__kern_posix; extern struct sysctl_oid sysctl__kern_posix_sem; extern struct sysctl_oid sysctl__kern_posix_sem_max; extern struct sysctl_oid sysctl__kern_sugid_scripts; +extern struct sysctl_oid sysctl__kern_nbuf; +extern struct sysctl_oid sysctl__kern_maxnbuf; extern struct sysctl_oid sysctl__kern_always_do_fullfsync; +extern struct sysctl_oid sysctl__kern_exec; +extern struct sysctl_oid sysctl__kern_exec_archhandler; +extern struct sysctl_oid sysctl__kern_exec_archhandler_powerpc; extern struct sysctl_oid sysctl__net_inet_icmp_icmplim; extern struct sysctl_oid sysctl__net_inet_icmp_maskrepl; extern struct sysctl_oid sysctl__net_inet_icmp_timestamp; @@ -315,6 +333,7 @@ extern struct sysctl_oid sysctl__vfs_generic_noremotehang; extern struct sysctl_oid sysctl__kern_ipc; extern struct sysctl_oid sysctl__kern_sysv; +extern struct sysctl_oid sysctl__kern_tfp; extern struct sysctl_oid sysctl__net_inet; @@ -371,6 +390,7 @@ extern struct sysctl_oid sysctl__machdep; extern struct sysctl_oid sysctl__net; extern struct sysctl_oid sysctl__debug; extern struct sysctl_oid sysctl__vfs; +extern struct sysctl_oid sysctl__vm; extern struct sysctl_oid sysctl__sysctl; #if INET6 @@ -470,6 +490,7 @@ extern struct sysctl_oid sysctl__net_key_natt_keepalive_interval; extern struct sysctl_oid sysctl__net_key_pfkeystat; #endif +extern struct sysctl_oid sysctl__vm_shared_region_trace_level; struct sysctl_oid *newsysctl_list[] = { @@ -479,6 +500,7 @@ struct sysctl_oid *newsysctl_list[] = &sysctl__net, &sysctl__debug, &sysctl__vfs, + &sysctl__vm, &sysctl__sysctl, &sysctl__debug_bpf_bufsize, &sysctl__debug_bpf_maxbufsize, @@ -498,6 +520,12 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__kern_posix_sem ,&sysctl__kern_posix_sem_max +#if __i386__ + ,&sysctl__sysctl_proc_exec_affinity +#endif + ,&sysctl__sysctl_proc_cputype + ,&sysctl__sysctl_proc_native + ,&sysctl__kern_sysv_shmmax ,&sysctl__kern_sysv_shmmin ,&sysctl__kern_sysv_shmmni @@ -514,6 +542,11 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__kern_sysv_semume ,&sysctl__kern_dummy + ,&sysctl__kern_tfp + ,&sysctl__kern_tfp_policy + ,&sysctl__kern_tfp_read_group + ,&sysctl__kern_tfp_rw_group + ,&sysctl__kern_ipc_maxsockbuf ,&sysctl__kern_ipc_mbstat ,&sysctl__kern_ipc_nmbclusters @@ -525,6 +558,11 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__kern_sugid_scripts ,&sysctl__kern_always_do_fullfsync + ,&sysctl__kern_exec + ,&sysctl__kern_exec_archhandler + ,&sysctl__kern_exec_archhandler_powerpc + ,&sysctl__kern_nbuf + ,&sysctl__kern_maxnbuf ,&sysctl__hw_machine ,&sysctl__hw_model @@ -537,6 +575,8 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__hw_byteorder ,&sysctl__hw_cputype ,&sysctl__hw_cpusubtype + ,&sysctl__hw_cpufamily + ,&sysctl__hw_cacheconfig ,&sysctl__hw_physmem ,&sysctl__hw_usermem ,&sysctl__hw_pagesize @@ -561,6 +601,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__hw_optional_floatingpoint ,&sysctl__hw_pagesize_compat + ,&sysctl__hw_pms ,&sysctl__hw_busfrequency_compat ,&sysctl__hw_cpufrequency_compat ,&sysctl__hw_cachelinesize_compat @@ -902,6 +943,9 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_ipsec_bypass ,&sysctl__net_inet_ipsec_esp_port #endif + + ,&sysctl__vm_shared_region_trace_level + ,(struct sysctl_oid *) 0 }; diff --git a/bsd/kern/sysv_msg.c b/bsd/kern/sysv_msg.c index 757edc883..9d34b4d7c 100644 --- a/bsd/kern/sysv_msg.c +++ b/bsd/kern/sysv_msg.c @@ -105,12 +105,10 @@ __private_extern__ void sysv_msg_lock_init( void ) { sysv_msg_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(sysv_msg_subsys_lck_grp_attr); sysv_msg_subsys_lck_grp = lck_grp_alloc_init("sysv_msg_subsys_lock", sysv_msg_subsys_lck_grp_attr); sysv_msg_subsys_lck_attr = lck_attr_alloc_init(); - /* lck_attr_setdebug(sysv_msg_subsys_lck_attr); */ lck_mtx_init(&sysv_msg_subsys_mutex, sysv_msg_subsys_lck_grp, sysv_msg_subsys_lck_attr); } diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index 21333d1f0..199cdd161 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -122,12 +122,10 @@ sysv_sem_lock_init( void ) { sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(sysv_sem_subsys_lck_grp_attr); - sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_sem_subsys_lck_grp_attr); + sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_sem_subsys_lock", sysv_sem_subsys_lck_grp_attr); sysv_sem_subsys_lck_attr = lck_attr_alloc_init(); - lck_attr_setdebug(sysv_sem_subsys_lck_attr); lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr); } diff --git a/bsd/kern/sysv_shm.c b/bsd/kern/sysv_shm.c index c626909e0..09f12c6d5 100644 --- a/bsd/kern/sysv_shm.c +++ b/bsd/kern/sysv_shm.c @@ -829,12 +829,10 @@ sysv_shm_lock_init( void ) { sysv_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(sysv_shm_subsys_lck_grp_attr); sysv_shm_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_shm_subsys_lck_grp_attr); sysv_shm_subsys_lck_attr = lck_attr_alloc_init(); - /* lck_attr_setdebug(sysv_shm_subsys_lck_attr); */ lck_mtx_init(&sysv_shm_subsys_mutex, sysv_shm_subsys_lck_grp, sysv_shm_subsys_lck_attr); } @@ -847,14 +845,14 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, int error = 0; int sysctl_shminfo_ret = 0; - error = SYSCTL_OUT(req, arg1, sizeof(user_ssize_t)); + error = SYSCTL_OUT(req, arg1, sizeof(int64_t)); if (error || req->newptr == USER_ADDR_NULL) return(error); SYSV_SHM_SUBSYS_LOCK(); /* Set the values only if shared memory is not initialised */ if (!shm_inited) { - if ((error = SYSCTL_IN(req, arg1, sizeof(user_ssize_t))) + if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) { sysctl_shminfo_ret = error; goto sysctl_shminfo_out; @@ -862,18 +860,18 @@ sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, if (arg1 == &shminfo.shmmax) { if (shminfo.shmmax & PAGE_MASK_64) { - shminfo.shmmax = (user_ssize_t)-1; + shminfo.shmmax = (int64_t)-1; sysctl_shminfo_ret = EINVAL; goto sysctl_shminfo_out; } } /* Initialize only when all values are set */ - if ((shminfo.shmmax != (user_ssize_t)-1) && - (shminfo.shmmin != (user_ssize_t)-1) && - (shminfo.shmmni != (user_ssize_t)-1) && - (shminfo.shmseg != (user_ssize_t)-1) && - (shminfo.shmall != (user_ssize_t)-1)) { + if ((shminfo.shmmax != (int64_t)-1) && + (shminfo.shmmin != (int64_t)-1) && + (shminfo.shmmni != (int64_t)-1) && + (shminfo.shmseg != (int64_t)-1) && + (shminfo.shmall != (int64_t)-1)) { shminit(NULL); } } diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 2cbd62364..72a62b4d1 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -70,9 +70,9 @@ #endif /* DIAGNOSTIC */ int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize); -int ubc_umcallback(vnode_t, void *); +static int ubc_umcallback(vnode_t, void *); int ubc_isinuse_locked(vnode_t, int, int); -int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *); +static int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *); struct zone *ubc_info_zone; diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index 9be151def..2f63a9203 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -259,7 +259,6 @@ domaininit() * allocate lock group attribute and group for domain mutexes */ domain_proto_mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(domain_proto_mtx_grp_attr); domain_proto_mtx_grp = lck_grp_alloc_init("domain", domain_proto_mtx_grp_attr); @@ -267,7 +266,6 @@ domaininit() * allocate the lock attribute for per domain mutexes */ domain_proto_mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(domain_proto_mtx_attr); if ((domain_proto_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) { printf("domaininit: can't init domain mtx for domain list\n"); diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index ec648d675..c4fac31f3 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -145,7 +145,7 @@ static void mbuf_expand_thread(void); static int m_expand(int ); static caddr_t m_bigalloc(int ); static void m_bigfree(caddr_t , u_int , caddr_t ); -static struct mbuf * m_mbigget(struct mbuf *, int ); +__private_extern__ struct mbuf * m_mbigget(struct mbuf *, int ); void mbinit(void); static void m_range_check(void *addr); @@ -216,11 +216,9 @@ mbinit(void) nclpp = round_page_32(MCLBYTES) / MCLBYTES; /* see mbufgc() */ if (nclpp < 1) nclpp = 1; mbuf_mlock_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(mbuf_mlock_grp_attr); mbuf_mlock_grp = lck_grp_alloc_init("mbuf", mbuf_mlock_grp_attr); mbuf_mlock_attr = lck_attr_alloc_init(); - lck_attr_setdefault(mbuf_mlock_attr); mbuf_mlock = lck_mtx_alloc_init(mbuf_mlock_grp, mbuf_mlock_attr); diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index cba1490ae..0422f1317 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -182,7 +182,6 @@ void socketinit() * allocate lock group attribute and group for socket cache mutex */ so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(so_cache_mtx_grp_attr); so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); @@ -190,7 +189,6 @@ void socketinit() * allocate the lock attribute for socket cache mutex */ so_cache_mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(so_cache_mtx_attr); so_cache_init_done = 1; @@ -465,6 +463,9 @@ socreate(dom, aso, type, proto) so->so_rcv.sb_flags |= SB_RECV; /* XXX */ so->so_rcv.sb_so = so->so_snd.sb_so = so; #endif + so->next_lock_lr = 0; + so->next_unlock_lr = 0; + //### Attachement will create the per pcb lock if necessary and increase refcount so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */ @@ -967,13 +968,16 @@ soconnect2(so1, so2) struct socket *so2; { int error; -//####### Assumes so1 is already locked / - socket_lock(so2, 1); + socket_lock(so1, 1); + if (so2->so_proto->pr_lock) + socket_lock(so2, 1); error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); - socket_unlock(so2, 1); + socket_unlock(so1, 1); + if (so2->so_proto->pr_lock) + socket_unlock(so2, 1); return (error); } @@ -2011,7 +2015,7 @@ static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free int soshutdown(so, how) register struct socket *so; - register int how; + int how; { register struct protosw *pr = so->so_proto; int ret; @@ -2846,11 +2850,9 @@ socket_lock(so, refcount) struct socket *so; int refcount; { - int error = 0, lr, lr_saved; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + int error = 0, lr_saved; + + lr_saved = (unsigned int) __builtin_return_address(0); if (so->so_proto->pr_lock) { error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); @@ -2862,7 +2864,8 @@ socket_lock(so, refcount) lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); if (refcount) so->so_usecount++; - so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */ + so->lock_lr[so->next_lock_lr] = (void *)lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; } return(error); @@ -2874,15 +2877,10 @@ socket_unlock(so, refcount) struct socket *so; int refcount; { - int error = 0, lr, lr_saved; + int error = 0, lr_saved; lck_mtx_t * mutex_held; -#ifdef __ppc__ -__asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif - - + lr_saved = (unsigned int) __builtin_return_address(0); if (so->so_proto == NULL) panic("socket_unlock null so_proto so=%x\n", so); @@ -2894,6 +2892,9 @@ __asm__ volatile("mflr %0" : "=r" (lr)); #ifdef MORE_LOCKING_DEBUG lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); #endif + so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; + if (refcount) { if (so->so_usecount <= 0) panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount); @@ -2901,8 +2902,6 @@ __asm__ volatile("mflr %0" : "=r" (lr)); if (so->so_usecount == 0) { sofreelastref(so, 1); } - else - so->reserved4 = (void*)lr_saved; /* save caller */ } lck_mtx_unlock(mutex_held); } @@ -2915,12 +2914,7 @@ sofree(so) struct socket *so; { - int lr, lr_saved; lck_mtx_t * mutex_held; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index ff81b933e..5d850a6ab 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -81,6 +81,7 @@ #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) #define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) +static int sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *); /* * Primitive routines for operating on sockets and socket buffers @@ -287,6 +288,8 @@ sonewconn_internal(head, connstatus) so->so_pgid = head->so_pgid; so->so_uid = head->so_uid; so->so_usecount = 1; + so->next_lock_lr = 0; + so->next_unlock_lr = 0; #ifdef __APPLE__ so->so_rcv.sb_flags |= SB_RECV; /* XXX */ @@ -329,8 +332,8 @@ sonewconn_internal(head, connstatus) head->so_qlen++; #ifdef __APPLE__ - /* Attach socket filters for this protocol */ - sflt_initsock(so); + /* Attach socket filters for this protocol */ + sflt_initsock(so); #endif if (connstatus) { so->so_state |= connstatus; @@ -411,17 +414,13 @@ int sbwait(sb) struct sockbuf *sb; { - int error = 0, lr, lr_saved; + int error = 0, lr_saved; struct socket *so = sb->sb_so; lck_mtx_t *mutex_held; struct timespec ts; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + lr_saved = (unsigned int) __builtin_return_address(0); - if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else @@ -459,12 +458,7 @@ sb_lock(sb) { struct socket *so = sb->sb_so; lck_mtx_t * mutex_held; - int error = 0, lr, lr_saved; - -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + int error = 0; if (so == NULL) panic("sb_lock: null so back pointer sb=%x\n", sb); @@ -477,6 +471,7 @@ sb_lock(sb) mutex_held = so->so_proto->pr_domain->dom_mtx; if (so->so_usecount < 1) panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount); + error = msleep((caddr_t)&sb->sb_flags, mutex_held, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0); if (so->so_usecount < 1) @@ -742,7 +737,7 @@ sbcheck(sb) int sbappendrecord(sb, m0) register struct sockbuf *sb; - register struct mbuf *m0; + struct mbuf *m0; { register struct mbuf *m; int result = 0; @@ -1466,13 +1461,12 @@ void sbunlock(struct sockbuf *sb, int keeplocked) { struct socket *so = sb->sb_so; - int lr, lr_saved; + int lr_saved; lck_mtx_t *mutex_held; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + + lr_saved = (unsigned int) __builtin_return_address(0); + sb->sb_flags &= ~SB_LOCK; if (so->so_proto->pr_getlock != NULL) @@ -1494,7 +1488,8 @@ sbunlock(struct sockbuf *sb, int keeplocked) so->so_usecount--; if (so->so_usecount < 0) panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags); - so->reserved4= lr_saved; + so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); } } diff --git a/bsd/machine/_limits.h b/bsd/machine/_limits.h index a5be9109a..b200f635b 100644 --- a/bsd/machine/_limits.h +++ b/bsd/machine/_limits.h @@ -24,7 +24,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/_limits.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/_limits.h" #else #error architecture not supported diff --git a/bsd/machine/_types.h b/bsd/machine/_types.h index 8e6333b6f..0f2de3369 100644 --- a/bsd/machine/_types.h +++ b/bsd/machine/_types.h @@ -24,7 +24,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/_types.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/_types.h" #else #error architecture not supported diff --git a/bsd/machine/cons.h b/bsd/machine/cons.h index 6d4b3d7cc..39f932b5e 100644 --- a/bsd/machine/cons.h +++ b/bsd/machine/cons.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include #else #error architecture not supported diff --git a/bsd/machine/disklabel.h b/bsd/machine/disklabel.h index 8d1402213..632e5e92e 100644 --- a/bsd/machine/disklabel.h +++ b/bsd/machine/disklabel.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/disklabel.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/disklabel.h" #else #error architecture not supported diff --git a/bsd/machine/endian.h b/bsd/machine/endian.h index a6f870e5b..13899419a 100644 --- a/bsd/machine/endian.h +++ b/bsd/machine/endian.h @@ -28,7 +28,7 @@ #if defined (__ppc__) || defined(__ppc64__) #include "ppc/endian.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/endian.h" #else #error architecture not supported diff --git a/bsd/machine/exec.h b/bsd/machine/exec.h index cb3306c73..d168d5969 100644 --- a/bsd/machine/exec.h +++ b/bsd/machine/exec.h @@ -25,10 +25,21 @@ #ifndef _BSD_MACHINE_EXEC_H_ #define _BSD_MACHINE_EXEC_H_ +#include + +struct exec_archhandler { + char path[MAXPATHLEN]; + uint32_t fsid; + long fileid; +}; + +extern struct exec_archhandler exec_archhandler_ppc; +extern int set_archhandler(struct proc *p, int arch); +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); #if defined (__ppc__) || defined (__ppc64__) #include "ppc/exec.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/exec.h" #else #error architecture not supported diff --git a/bsd/machine/param.h b/bsd/machine/param.h index ab305ba73..b8994c955 100644 --- a/bsd/machine/param.h +++ b/bsd/machine/param.h @@ -28,7 +28,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/param.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/param.h" #else #error architecture not supported diff --git a/bsd/machine/profile.h b/bsd/machine/profile.h index 847570beb..b84a10bc7 100644 --- a/bsd/machine/profile.h +++ b/bsd/machine/profile.h @@ -32,7 +32,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/profile.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/profile.h" #else #error architecture not supported diff --git a/bsd/machine/psl.h b/bsd/machine/psl.h index 06c76e528..aa74ef173 100644 --- a/bsd/machine/psl.h +++ b/bsd/machine/psl.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/psl.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/psl.h" #else #error architecture not supported diff --git a/bsd/machine/ptrace.h b/bsd/machine/ptrace.h index 8d14243a6..1c0cde0be 100644 --- a/bsd/machine/ptrace.h +++ b/bsd/machine/ptrace.h @@ -28,7 +28,7 @@ #if defined (__ppc__) || defined(__ppc64__) #include "ppc/ptrace.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/ptrace.h" #else #error architecture not supported diff --git a/bsd/machine/reboot.h b/bsd/machine/reboot.h index 7d0af116c..368be9e2a 100644 --- a/bsd/machine/reboot.h +++ b/bsd/machine/reboot.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/reboot.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/reboot.h" #else #error architecture not supported diff --git a/bsd/machine/reg.h b/bsd/machine/reg.h index 7e18c5b53..e8166db67 100644 --- a/bsd/machine/reg.h +++ b/bsd/machine/reg.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/reg.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/reg.h" #else #error architecture not supported diff --git a/bsd/machine/setjmp.h b/bsd/machine/setjmp.h index c4bbf5dec..79b088b82 100644 --- a/bsd/machine/setjmp.h +++ b/bsd/machine/setjmp.h @@ -27,7 +27,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/setjmp.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/setjmp.h" #else #error architecture not supported diff --git a/bsd/machine/signal.h b/bsd/machine/signal.h index 6c926665e..34a5e06cd 100644 --- a/bsd/machine/signal.h +++ b/bsd/machine/signal.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/signal.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/signal.h" #else #error architecture not supported diff --git a/bsd/machine/types.h b/bsd/machine/types.h index 12053c52f..b5c580f24 100644 --- a/bsd/machine/types.h +++ b/bsd/machine/types.h @@ -28,7 +28,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/types.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/types.h" #else #error architecture not supported diff --git a/bsd/machine/ucontext.h b/bsd/machine/ucontext.h index fa9635508..b5fdd1ad5 100644 --- a/bsd/machine/ucontext.h +++ b/bsd/machine/ucontext.h @@ -24,7 +24,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/ucontext.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/ucontext.h" #else #error architecture not supported diff --git a/bsd/machine/vmparam.h b/bsd/machine/vmparam.h index ab232e8bb..f084c03bd 100644 --- a/bsd/machine/vmparam.h +++ b/bsd/machine/vmparam.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "ppc/vmparam.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "i386/vmparam.h" #else #error architecture not supported diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile index 3473b0190..7312eba32 100644 --- a/bsd/man/man2/Makefile +++ b/bsd/man/man2/Makefile @@ -69,6 +69,7 @@ DATAFILES = \ gettimeofday.2 \ getuid.2 \ getxattr.2 \ + i386_get_ldt.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -174,7 +175,7 @@ DATAFILES = \ # List of source/target hard link pairs for installed manual pages; source # names may be repeated -MLINKS= kqueue.2 kevent.2 +MLINKS= kqueue.2 kevent.2 i386_get_ldt.2 i386_set_ldt.2 INSTALL_MAN_LIST = ${DATAFILES} INSTALL_MAN_LINKS = ${MLINKS} diff --git a/bsd/man/man2/i386_get_ldt.2 b/bsd/man/man2/i386_get_ldt.2 new file mode 100644 index 000000000..40ae07566 --- /dev/null +++ b/bsd/man/man2/i386_get_ldt.2 @@ -0,0 +1,143 @@ +.\" Copyright (c) 1980, 1991 Regents of the University of California. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from: @(#)fork.2 6.5 (Berkeley) 3/10/91 +.\" $FreeBSD: /repoman/r/ncvs/src/lib/libc/i386/sys/i386_get_ldt.2,v 1.21 2004/07/02 19:07:30 ru Exp $ +.\" +.Dd September 20, 1993 +.Dt I386_GET_LDT 2 +.Os +.Sh NAME +.Nm i386_get_ldt , +.Nm i386_set_ldt +.Nd manage i386 per-process Local Descriptor Table entries +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In architecture/i386/table.h +.In i386/user_ldt.h +.Ft int +.Fn i386_get_ldt "int start_sel" "union ldt_entry *descs" "int num_sels" +.Ft int +.Fn i386_set_ldt "int start_sel" "union ldt_entry *descs" "int num_sels" +.Sh DESCRIPTION +The +.Fn i386_get_ldt +system call +will return the list of i386 descriptors that the process has in its +LDT. +The +.Fn i386_set_ldt +system call +will set a list of i386 descriptors for the current process in its +LDT. +Both routines accept a starting selector number +.Fa start_sel , +an array of memory that +will contain the descriptors to be set or returned +.Fa descs , +and the number of entries to set or return +.Fa num_sels . +.Pp +The argument +.Fa descs +can be either code_desc_t, data_desc_t or call_gate_t and are defined in +.In architecture/i386/desc.h . +These structures are defined by the architecture +as disjoint bit-fields, so care must be taken in constructing them. +.Pp +If +.Fa start_sel +is +.Em LDT_AUTO_ALLOC , +.Fa num_sels +is 1 and the descriptor pointed to by +.Fa descs +is legal, then +.Fn i386_set_ldt +will allocate a descriptor and return its +selector number. +.Pp +If +.Fa num_descs +is 1, +.Fa start_sels +is valid, and +.Fa descs +is NULL, then +.Fn i386_set_ldt +will free that descriptor +(making it available to be reallocated again later). +.Pp +If +.Fa num_descs +is 0, +.Fa start_sels +is 0 and +.Fa descs +is NULL then, as a special case, +.Fn i386_set_ldt +will free all descriptors. +.Sh RETURN VALUES +Upon successful completion, +.Fn i386_get_ldt +returns the number of descriptors currently in the LDT. +The +.Fn i386_set_ldt +system call +returns the first selector set. +In the case when a descriptor is allocated by the kernel, its number will +be returned. +Otherwise, a value of -1 is returned and the global +variable +.Va errno +is set to indicate the error. +.Sh ERRORS +The +.Fn i386_get_ldt +and +.Fn i386_set_ldt +system calls +will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +An inappropriate value was used for +.Fa start_sel +or +.Fa num_sels . +.It Bq Er EACCES +The caller attempted to use a descriptor that would +circumvent protection or cause a failure. +.El +.Sh SEE ALSO +i386 Microprocessor Programmer's Reference Manual, Intel +.Sh WARNING +You can really hose your process using this. diff --git a/bsd/man/man2/minherit.2 b/bsd/man/man2/minherit.2 index 5943caf37..01e4ffbbd 100644 --- a/bsd/man/man2/minherit.2 +++ b/bsd/man/man2/minherit.2 @@ -1,4 +1,4 @@ -.\" $Id: minherit.2,v 1.2 2002/03/13 00:18:19 lindak Exp $ +.\" $Id: minherit.2,v 1.2.2684.1 2005/06/24 01:47:10 lindak Exp $ .\" .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index ff61c3d5a..ffecc22d2 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -127,14 +127,12 @@ static int devfs_ready = 0; int devfs_sinit(void) { - int error; + int error; - devfs_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(devfs_lck_grp_attr); + devfs_lck_grp_attr = lck_grp_attr_alloc_init(); devfs_lck_grp = lck_grp_alloc_init("devfs_lock", devfs_lck_grp_attr); devfs_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(devfs_lck_attr); lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr); diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index c2148de5d..a1ed0909d 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -71,6 +71,7 @@ static int devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, vfs_context_t static int devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context); static struct vfstable * devfs_vfsp = 0; +extern int setup_kmem; /*- @@ -92,10 +93,9 @@ devfs_init(struct vfsconf *vfsp) UID_ROOT, GID_WHEEL, 0622, "console"); devfs_make_node(makedev(2, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, "tty"); - devfs_make_node(makedev(3, 0), DEVFS_CHAR, - UID_ROOT, GID_KMEM, 0640, "mem"); - devfs_make_node(makedev(3, 1), DEVFS_CHAR, - UID_ROOT, GID_KMEM, 0640, "kmem"); + if (setup_kmem) { + devfs_setup_kmem(); + } devfs_make_node(makedev(3, 2), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, "null"); devfs_make_node(makedev(3, 3), DEVFS_CHAR, @@ -105,6 +105,16 @@ devfs_init(struct vfsconf *vfsp) return 0; } +__private_extern__ void +devfs_setup_kmem(void) +{ + devfs_make_node(makedev(3, 0), DEVFS_CHAR, + UID_ROOT, GID_KMEM, 0640, "mem"); + devfs_make_node(makedev(3, 1), DEVFS_CHAR, + UID_ROOT, GID_KMEM, 0640, "kmem"); +} + + /*- * mp - pointer to 'mount' structure * path - addr in user space of mount point (ie /usr or whatever) diff --git a/bsd/miscfs/devfs/devfs_vnops.c b/bsd/miscfs/devfs/devfs_vnops.c index c74d145f2..40f719892 100644 --- a/bsd/miscfs/devfs/devfs_vnops.c +++ b/bsd/miscfs/devfs/devfs_vnops.c @@ -1336,7 +1336,6 @@ static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 7cb75e4cd..cf66f74c0 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -123,7 +123,6 @@ struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ @@ -854,17 +853,6 @@ spec_pathconf(ap) /* NOTREACHED */ } -int -spec_devblocksize(ap) - struct vnop_devblocksize_args /* { - struct vnode *a_vp; - int *a_retval; - } */ *ap; -{ - *ap->a_retval = (ap->a_vp->v_specsize); - return (0); -} - /* * Special device failed operation */ diff --git a/bsd/miscfs/specfs/specdev.h b/bsd/miscfs/specfs/specdev.h index 61c340ac1..f40247957 100644 --- a/bsd/miscfs/specfs/specdev.h +++ b/bsd/miscfs/specfs/specdev.h @@ -154,7 +154,6 @@ int spec_pathconf (struct vnop_pathconf_args *); #define spec_valloc (int (*) (struct vnop_access_args *))err_valloc #define spec_vfree (int (*) (struct vnop_access_args *))err_vfree #define spec_bwrite (int (*) (struct vnop_bwrite_args *))nop_bwrite -int spec_devblocksize (struct vnop_devblocksize_args *); int spec_blktooff (struct vnop_blktooff_args *); int spec_offtoblk (struct vnop_offtoblk_args *); int spec_blockmap (struct vnop_blockmap_args *); diff --git a/bsd/miscfs/synthfs/synthfs_vnops.c b/bsd/miscfs/synthfs/synthfs_vnops.c index 4f1110e77..c6367ec4d 100644 --- a/bsd/miscfs/synthfs/synthfs_vnops.c +++ b/bsd/miscfs/synthfs/synthfs_vnops.c @@ -105,7 +105,6 @@ struct vnodeopv_entry_desc synthfs_vnodeop_entries[] = { {&vnop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate - not supported */ {&vnop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein - not supported */ {&vnop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout - not supported */ - {&vnop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize - not supported */ {&vnop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs - not supported */ {&vnop_copyfile_desc, (VOPFUNC)err_copyfile}, /* copyfile - not supported */ { &vnop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff not supported */ diff --git a/bsd/miscfs/union/union.h b/bsd/miscfs/union/union.h index 475a6f7dd..9c64c4c7d 100644 --- a/bsd/miscfs/union/union.h +++ b/bsd/miscfs/union/union.h @@ -90,19 +90,12 @@ struct union_mount { */ /* LP64todo - should this move? */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_union_args { user_addr_t target; /* Target of loopback */ int mntflags; /* Options on the mount */ + char _pad[4]; }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ diff --git a/bsd/miscfs/volfs/volfs_vnops.c b/bsd/miscfs/volfs/volfs_vnops.c index d875957b1..82a1bd6ad 100644 --- a/bsd/miscfs/volfs/volfs_vnops.c +++ b/bsd/miscfs/volfs/volfs_vnops.c @@ -141,7 +141,6 @@ struct vnodeopv_entry_desc volfs_vnodeop_entries[] = { {&vnop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate */ {&vnop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein */ {&vnop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout */ - {&vnop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize */ {&vnop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs */ {&vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ {&vnop_blktooff_desc, (VOPFUNC)err_blktooff}, /* blktooff */ diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index 3d025ccbc..5581b6ea2 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -1611,12 +1611,10 @@ bpf_init(__unused void *unused) bpf_devsw_installed = 1; bpf_mlock_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(bpf_mlock_grp_attr); bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr); bpf_mlock_attr = lck_attr_alloc_init(); - lck_attr_setdefault(bpf_mlock_attr); bpf_mlock = lck_mtx_alloc_init(bpf_mlock_grp, bpf_mlock_attr); diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index b6b0a3995..e20ed1a91 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -103,19 +103,11 @@ struct bpf_program { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with bpf_program */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_bpf_program { u_int bf_len; - user_addr_t bf_insns; + user_addr_t bf_insns __attribute__((aligned(8))); }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL /* diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 1118b6142..4b24b76f9 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -551,7 +551,6 @@ dlil_init(void) /* Setup the lock groups we will use */ grp_attributes = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attributes); dlil_lock_group = lck_grp_alloc_init("dlil internal locks", grp_attributes); #if IFNET_RW_LOCK @@ -566,10 +565,8 @@ dlil_init(void) /* Setup the lock attributes we will use */ lck_attributes = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attributes); ifnet_lock_attr = lck_attr_alloc_init(); - lck_attr_setdefault(ifnet_lock_attr); dlil_input_lock = lck_spin_alloc_init(input_lock_grp, lck_attributes); input_lock_grp = 0; diff --git a/bsd/net/ether_at_pr_module.c b/bsd/net/ether_at_pr_module.c index 7f031bdd8..1bf55ad72 100644 --- a/bsd/net/ether_at_pr_module.c +++ b/bsd/net/ether_at_pr_module.c @@ -185,7 +185,7 @@ ether_pre_output( eh = (struct ether_header *)dst_netaddr->sa_data; bcopy((caddr_t)eh->ether_dhost, (caddr_t)edst, 6); - *(u_short *)type = m->m_pkthdr.len; + *(u_short *)type = htons(m->m_pkthdr.len); } break; diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c index 5c7005dc4..f530e874f 100644 --- a/bsd/net/if_bond.c +++ b/bsd/net/if_bond.c @@ -98,8 +98,6 @@ my_lck_grp_alloc_init(const char * grp_name) lck_grp_attr_t * grp_attrs; grp_attrs = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attrs); - lck_grp_attr_setdefault(grp_attrs); grp = lck_grp_alloc_init(grp_name, grp_attrs); lck_grp_attr_free(grp_attrs); return (grp); @@ -112,7 +110,6 @@ my_lck_mtx_alloc_init(lck_grp_t * lck_grp) lck_mtx_t * lck_mtx; lck_attrs = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attrs); lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs); lck_attr_free(lck_attrs); return (lck_mtx); diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index 8ac2039fb..6feecc2b9 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -85,7 +85,7 @@ #define APPLE_IF_FAM_STF 12 #define APPLE_IF_FAM_FIREWIRE 13 #define APPLE_IF_FAM_BOND 14 -#endif __APPLE__ +#endif /* __APPLE__ */ /* * 72 was chosen below because it is the size of a TCP/IP diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 21e88cddd..a9d91c055 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -113,7 +113,6 @@ my_lck_grp_alloc_init(const char * grp_name) lck_grp_attr_t * grp_attrs; grp_attrs = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attrs); grp = lck_grp_alloc_init(grp_name, grp_attrs); lck_grp_attr_free(grp_attrs); return (grp); @@ -126,7 +125,6 @@ my_lck_mtx_alloc_init(lck_grp_t * lck_grp) lck_mtx_t * lck_mtx; lck_attrs = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attrs); lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs); lck_attr_free(lck_attrs); return (lck_mtx); diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h index e56930a55..38e3eb120 100644 --- a/bsd/net/kext_net.h +++ b/bsd/net/kext_net.h @@ -98,9 +98,7 @@ int sflt_attach_private(struct socket *so, struct socket_filter *filter, sflt_h * the 'where' NKE. If the latter is NULL, the flags indicate "first" * or "last" */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct so_nke { unsigned int nke_handle; @@ -109,9 +107,7 @@ struct so_nke unsigned long reserved[4]; /* for future use */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #endif /* NET_KEXT_NET_H */ diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index 0ce0359ba..74143ae02 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -1024,7 +1024,10 @@ errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses) } MALLOC(*addresses, ifmultiaddr_t*, sizeof(ifmultiaddr_t) * (cmax + 1), M_TEMP, M_NOWAIT); - if (*addresses == NULL) return ENOMEM; + if (*addresses == NULL) { + if (lock) ifnet_lock_done(interface); + return ENOMEM; + } LIST_FOREACH(addr, &interface->if_multiaddrs, ifma_link) { diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c index f1611a11e..e33558dcc 100644 --- a/bsd/net/kpi_protocol.c +++ b/bsd/net/kpi_protocol.c @@ -89,11 +89,9 @@ proto_kpi_init(void) /* Allocate a mtx lock */ grp_attrib = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attrib); lck_group = lck_grp_alloc_init("protocol kpi", grp_attrib); lck_grp_attr_free(grp_attrib); lck_attrib = lck_attr_alloc_init(); - lck_attr_setdefault(lck_attrib); proto_input_lock = lck_mtx_alloc_init(lck_group, lck_attrib); lck_grp_free(lck_group); lck_attr_free(lck_attrib); diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index ed973ec45..d5e7e80b3 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -289,7 +289,10 @@ ndrv_event(struct ifnet *ifp, struct kev_msg *event) event->kev_class == KEV_NETWORK_CLASS && event->kev_subclass == KEV_DL_SUBCLASS && event->event_code == KEV_DL_IF_DETACHING) { + lck_mtx_assert(ndrvdomain.dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ndrvdomain.dom_mtx); ndrv_handle_ifp_detach(ifp->if_family, ifp->if_unit); + lck_mtx_unlock(ndrvdomain.dom_mtx); } } @@ -358,7 +361,9 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) ndrv_proto.event = ndrv_event; /* We aren't worried about double attaching, that should just return an error */ + socket_unlock(so, 0); result = dlil_attach_protocol(&ndrv_proto); + socket_lock(so, 0); if (result && result != EEXIST) { return result; } @@ -524,21 +529,24 @@ ndrv_do_detach(struct ndrv_cb *np) struct ndrv_cb* cur_np = NULL; struct socket *so = np->nd_socket; int error = 0; + struct ifnet * ifp; #if NDRV_DEBUG kprintf("NDRV detach: %x, %x\n", so, np); #endif ndrv_remove_all_multicast(np); - - if (np->nd_if) { - if (np->nd_proto_family != PF_NDRV && - np->nd_proto_family != 0) { - dlil_detach_protocol(np->nd_if, np->nd_proto_family); + ifp = np->nd_if; + /* Remove from the linked list of control blocks */ + TAILQ_REMOVE(&ndrvl, np, nd_next); + if (ifp != NULL) { + u_long proto_family = np->nd_proto_family; + + if (proto_family != PF_NDRV && proto_family != 0) { + socket_unlock(so, 0); + dlil_detach_protocol(ifp, proto_family); + socket_lock(so, 0); } - /* Remove from the linked list of control blocks */ - TAILQ_REMOVE(&ndrvl, np, nd_next); - /* Check if this is the last socket attached to this interface */ TAILQ_FOREACH(cur_np, &ndrvl, nd_next) { if (cur_np->nd_family == np->nd_family && @@ -549,13 +557,16 @@ ndrv_do_detach(struct ndrv_cb *np) /* If there are no other interfaces, detach PF_NDRV from the interface */ if (cur_np == NULL) { - dlil_detach_protocol(np->nd_if, PF_NDRV); + socket_unlock(so, 0); + dlil_detach_protocol(ifp, PF_NDRV); + socket_lock(so, 0); } - } else { - /* Remove from the linked list of control blocks */ - TAILQ_REMOVE(&ndrvl, np, nd_next); } + if (np->nd_laddr != NULL) { + FREE((caddr_t)np->nd_laddr, M_IFADDR); + np->nd_laddr = NULL; + } FREE((caddr_t)np, M_PCB); so->so_pcb = 0; so->so_flags |= SOF_PCBCLEARING; @@ -566,6 +577,7 @@ ndrv_do_detach(struct ndrv_cb *np) static int ndrv_do_disconnect(struct ndrv_cb *np) { + struct socket * so = np->nd_socket; #if NDRV_DEBUG kprintf("NDRV disconnect: %x\n", np); #endif @@ -574,9 +586,9 @@ ndrv_do_disconnect(struct ndrv_cb *np) FREE(np->nd_faddr, M_IFADDR); np->nd_faddr = 0; } - if (np->nd_socket->so_state & SS_NOFDREF) + if (so->so_state & SS_NOFDREF) ndrv_do_detach(np); - soisdisconnected(np->nd_socket); + soisdisconnected(so); return(0); } @@ -645,6 +657,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) struct dlil_demux_desc* dlilDemux = NULL; struct ndrv_demux_desc* ndrvDemux = NULL; int error = 0; + struct socket *so = np->nd_socket; /* Sanity checking */ if (np->nd_proto_family != PF_NDRV) @@ -714,7 +727,9 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) if (error == 0) { /* We've got all our ducks lined up...lets attach! */ + socket_unlock(so, 0); error = dlil_attach_protocol(&dlilSpec); + socket_lock(so, 0); if (error == 0) np->nd_proto_family = dlilSpec.protocol_family; } diff --git a/bsd/net/raw_usrreq.c b/bsd/net/raw_usrreq.c index d9bf97217..20d017a26 100644 --- a/bsd/net/raw_usrreq.c +++ b/bsd/net/raw_usrreq.c @@ -79,14 +79,10 @@ raw_init() { raw_mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(raw_mtx_grp_attr); - raw_mtx_grp = lck_grp_alloc_init("rawcb", raw_mtx_grp_attr); raw_mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(raw_mtx_attr); - if ((raw_mtx = lck_mtx_alloc_init(raw_mtx_grp, raw_mtx_attr)) == NULL) { printf("raw_init: can't alloc raw_mtx\n"); return; diff --git a/bsd/net/route.c b/bsd/net/route.c index 4ab8d1d16..0e21b7266 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -111,14 +111,10 @@ route_init() { rt_mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(rt_mtx_grp_attr); - rt_mtx_grp = lck_grp_alloc_init("route", rt_mtx_grp_attr); rt_mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(rt_mtx_attr); - if ((rt_mtx = lck_mtx_alloc_init(rt_mtx_grp, rt_mtx_attr)) == NULL) { printf("route_init: can't alloc rt_mtx\n"); return; diff --git a/bsd/net/zlib.c b/bsd/net/zlib.c index a3d4c72ba..7be8133ad 100644 --- a/bsd/net/zlib.c +++ b/bsd/net/zlib.c @@ -49,7 +49,7 @@ subject to change. Applications should only use zlib.h. */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ #ifndef _Z_UTIL_H #define _Z_UTIL_H @@ -295,7 +295,7 @@ void zcfree OF((voidpf opaque, voidpf ptr)); subject to change. Applications should only use zlib.h. */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ #ifndef _DEFLATE_H #define _DEFLATE_H @@ -655,7 +655,7 @@ void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, * */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ /* #include "deflate.h" */ @@ -1997,7 +1997,7 @@ local block_state deflate_slow(s, flush) * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ /* #define GEN_TREES_H */ @@ -5542,7 +5542,7 @@ z_streamp z; * For conditions of distribution and use, see copyright notice in zlib.h */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ /* #include "zutil.h" */ @@ -5772,7 +5772,7 @@ void zcfree (opaque, ptr) * For conditions of distribution and use, see copyright notice in zlib.h */ -/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10.874.1 2005/06/24 01:47:11 lindak Exp $ */ /* #include "zlib.h" */ diff --git a/bsd/netat/adsp.c b/bsd/netat/adsp.c index b6c1cf6e6..1b88d6750 100644 --- a/bsd/netat/adsp.c +++ b/bsd/netat/adsp.c @@ -50,7 +50,6 @@ struct adsp_debug adsp_dtable[1025]; int ad_entry = 0; #endif -extern atlock_t adspgen_lock; adspAllocateCCB(gref) register gref_t *gref; /* READ queue */ @@ -69,9 +68,6 @@ adspAllocateCCB(gref) sp->pid = gref->pid; /* save the caller process pointer */ sp->gref = gref; /* save a back pointer to the WRITE queue */ sp->sp_mp = ccb_mp; /* and its message block */ - ATLOCKINIT(sp->lock); - ATLOCKINIT(sp->lockClose); - ATLOCKINIT(sp->lockRemove); return 1; } @@ -79,19 +75,14 @@ adspRelease(gref) register gref_t *gref; /* READ queue */ { register CCBPtr sp; - int s, l; - ATDISABLE(l, adspgen_lock); if (gref->info) { sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - ATDISABLE(s, sp->lock); - ATENABLE(s, adspgen_lock); /* Tells completion routine of close */ /* packet to remove us. */ if (sp->state == sPassive || sp->state == sClosed || sp->state == sOpening || sp->state == sListening) { - ATENABLE(l, sp->lock); if (sp->state == sListening) CompleteQueue(&sp->opb, errAborted); sp->removing = 1; /* Prevent allowing another dspClose. */ @@ -100,7 +91,6 @@ adspRelease(gref) } else { /* sClosing & sOpen */ sp->state = sClosing; } - ATENABLE(l, sp->lock); if (CheckOkToClose(sp)) { /* going to close */ sp->sendCtl = B_CTL_CLOSE; /* Send close advice */ @@ -110,13 +100,10 @@ adspRelease(gref) sp->sendCtl = B_CTL_CLOSE; /* Setup to send close advice */ } CheckSend(sp); /* and force out the close */ - ATDISABLE(s, sp->lock); sp->removing = 1; /* Prevent allowing another dspClose. */ sp->state = sClosed; - ATENABLE(s, sp->lock); DoClose(sp, errAborted, 0); /* to closed and remove CCB */ - } else - ATENABLE(l, adspgen_lock); + } } @@ -359,11 +346,11 @@ adsp_sendddp(sp, mp, length, dstnetaddr, ddptype) /* Set up the DDP header */ ddp = (DDPX_FRAME *) gbuf_rptr(mp); - UAS_ASSIGN(ddp->ddpx_length, (length + DDPL_FRAME_LEN)); + UAS_ASSIGN_HTON(ddp->ddpx_length, (length + DDPL_FRAME_LEN)); UAS_ASSIGN(ddp->ddpx_cksm, 0); if (sp) { if (sp->useCheckSum) - UAS_ASSIGN(ddp->ddpx_cksm, 1); + UAS_ASSIGN_HTON(ddp->ddpx_cksm, 1); } NET_ASSIGN(ddp->ddpx_dnet, dstnetaddr->a.net); diff --git a/bsd/netat/adsp_CLDeny.c b/bsd/netat/adsp_CLDeny.c index a7e14c4b0..62b14b93b 100644 --- a/bsd/netat/adsp_CLDeny.c +++ b/bsd/netat/adsp_CLDeny.c @@ -82,8 +82,8 @@ int adspCLDeny(struct adspcmd *pb, CCBPtr sp) adspp->descriptor = ADSP_CONTROL_BIT | ADSP_CTL_ODENY; adspop = (ADSP_OPEN_DATAPtr)gbuf_wptr(mp); gbuf_winc(mp,ADSP_OPEN_FRAME_LEN); - UAS_ASSIGN(adspop->dstCID, pb->u.openParams.remoteCID); - UAS_ASSIGN(adspop->version, 0x100); + UAS_ASSIGN_HTON(adspop->dstCID, pb->u.openParams.remoteCID); + UAS_ASSIGN_HTON(adspop->version, 0x100); adsp_sendddp(sp, mp, DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN, &pb->u.openParams.remoteAddress, DDP_ADSP); diff --git a/bsd/netat/adsp_CLListen.c b/bsd/netat/adsp_CLListen.c index f4f33f25d..be7cf9f3d 100644 --- a/bsd/netat/adsp_CLListen.c +++ b/bsd/netat/adsp_CLListen.c @@ -69,7 +69,6 @@ int adspCLListen(sp, pb) /* (DSPPBPtr pb) */ { register struct adspcmd *clpb; gbuf_t *mp; - int s; if (sp == 0) { pb->ioResult = errRefNum; @@ -87,12 +86,8 @@ int adspCLListen(sp, pb) /* (DSPPBPtr pb) */ clpb = (struct adspcmd *)gbuf_rptr(mp); clpb->ioc = 0; clpb->mp = mp; - ATDISABLE(s, sp->lock); - if (qAddToEnd(&sp->opb, clpb)){ /* Add to list of listeners */ - ATENABLE(s, sp->lock); - return EFAULT; /* bogus, but discriminate from other errors */ - } - ATENABLE(s, sp->lock); + if (qAddToEnd(&sp->opb, clpb)) /* Add to list of listeners */ + return EFAULT; /* bogus, but discriminate from other errors */ } else { pb->ioResult = errDSPQueueSize; return ENOBUFS; diff --git a/bsd/netat/adsp_Close.c b/bsd/netat/adsp_Close.c index 179d60e96..381e22200 100644 --- a/bsd/netat/adsp_Close.c +++ b/bsd/netat/adsp_Close.c @@ -56,7 +56,6 @@ #include #include -extern atlock_t adspall_lock; static void qRemove(CCBPtr, CCBPtr); @@ -122,7 +121,6 @@ int CompleteQueue(qhead, code) /* (DSPPBPtr FPTR qhead, OSErr code) */ register gref_t *gref; register int total = 0; CCBPtr sp = 0; - int s; n = *qhead; /* Get first item */ *qhead = 0; /* Zero out the queue */ @@ -131,7 +129,6 @@ int CompleteQueue(qhead, code) /* (DSPPBPtr FPTR qhead, OSErr code) */ if (gref->info) { sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); atalk_flush(sp->gref); - ATDISABLE(s, sp->lock); } } @@ -144,8 +141,6 @@ int CompleteQueue(qhead, code) /* (DSPPBPtr FPTR qhead, OSErr code) */ } else gbuf_freem(p->mp); } /* while */ - if (sp) - ATENABLE(s, sp->lock); return(total); } @@ -352,7 +347,6 @@ int adspClose(sp, pb) /* (DSPPBPtr pb) */ register CCBPtr sp; register struct adspcmd *pb; { - int s; register gbuf_t *mp; /* Must execute nearly all of this with ints off because user could @@ -404,10 +398,8 @@ int adspClose(sp, pb) /* (DSPPBPtr pb) */ * is still pending. */ if (pb->csCode == (short)dspClose) { - ATDISABLE(s, sp->lock); if ((sp->state == (short)sPassive) || (sp->state == (short)sOpening)) { sp->state = sClosed; - ATENABLE(s, sp->lock); DoClose(sp, errAborted, 0); pb->ioResult = 0; adspioc_ack(0, pb->ioc, pb->gref); @@ -415,23 +407,19 @@ int adspClose(sp, pb) /* (DSPPBPtr pb) */ } if (sp->state == (word)sClosed) { /* Ok to close a closed connection */ - ATENABLE(s, sp->lock); pb->ioResult = 0; adspioc_ack(0, pb->ioc, pb->gref); return 0; } if ((sp->state != (word)sOpen) && (sp->state != (word)sClosing)) { - ATENABLE(s, sp->lock); pb->ioResult = errState; return EINVAL; } sp->state = sClosing; /* No matter what, we're closing */ - ATENABLE(s, sp->lock); } /* dspClose */ else { /* dspRemove */ - ATDISABLE(s, sp->lock); sp->removing = 1; /* Prevent allowing another dspClose. */ /* Tells completion routine of close */ /* packet to remove us. */ @@ -439,13 +427,10 @@ int adspClose(sp, pb) /* (DSPPBPtr pb) */ if (sp->state == sPassive || sp->state == sClosed || sp->state == sOpening) { sp->state = sClosed; - ATENABLE(s, sp->lock); DoClose(sp, errAborted, 0); /* Will remove CCB! */ return 0; - } else { /* sClosing & sOpen */ + } else /* sClosing & sOpen */ sp->state = sClosing; - ATENABLE(s, sp->lock); - } } /* dspRemove */ @@ -461,9 +446,7 @@ int adspClose(sp, pb) /* (DSPPBPtr pb) */ pb = (struct adspcmd *)gbuf_rptr(mp); /* get new parameter block */ pb->ioc = 0; pb->mp = mp; - ATDISABLE(s, sp->lock); qAddToEnd(&sp->opb, pb); /* and save it */ - ATENABLE(s, sp->lock); } else { pb->ioResult = 0; adspioc_ack(0, pb->ioc, pb->gref); /* release user, and keep no copy @@ -479,19 +462,15 @@ static void qRemove(qptr, elem) register CCBPtr qptr; register CCBPtr elem; { - int s; - ATDISABLE(s, adspall_lock); while(qptr->ccbLink) { if ((DSPPBPtr)(qptr->ccbLink) == (DSPPBPtr)elem) { qptr->ccbLink = elem->ccbLink; elem->ccbLink = 0; - ATENABLE(s, adspall_lock); return; } qptr = qptr->ccbLink; } - ATENABLE(s, adspall_lock); } int RxClose(sp) @@ -499,17 +478,11 @@ int RxClose(sp) { register gbuf_t *mp; register struct adspcmd *pb; - int s, l; - ATDISABLE(l, sp->lockClose); - ATDISABLE(s, sp->lock); - if ((sp->state == sClosing) || (sp->state == sClosed)) { - ATENABLE(s, sp->lock); - ATENABLE(l, sp->lockClose); + if ((sp->state == sClosing) || (sp->state == sClosed)) return 0; - } + sp->state = sClosed; - ATENABLE(s, sp->lock); CheckReadQueue(sp); /* try to deliver all remaining data */ if ( (mp = gbuf_alloc(sizeof(struct adspcmd), PRI_HI)) ) { @@ -526,6 +499,5 @@ int RxClose(sp) if ((sp->userFlags & eClosed) == 0) DoClose(sp, errAborted, -1); /* abort send requests and timers */ - ATENABLE(l, sp->lockClose); return 0; } diff --git a/bsd/netat/adsp_Control.c b/bsd/netat/adsp_Control.c index fe9d6803a..44e521158 100644 --- a/bsd/netat/adsp_Control.c +++ b/bsd/netat/adsp_Control.c @@ -129,7 +129,6 @@ void CheckSend(sp) /* (CCBPtr sp) */ { int i; int attnMsg; /* True if attention message */ - int s; register gbuf_t *mp; /* send message block */ #ifdef notdef register gbuf_t *tmp; @@ -155,7 +154,6 @@ void CheckSend(sp) /* (CCBPtr sp) */ gbuf_freel(mlist); return; /* can't get buffers... do nothing! */ } - ATDISABLE(s, sp->lock); sp->callSend = 0; /* Clear flag */ use_attention_code = 0; len = 0; @@ -176,24 +174,24 @@ void CheckSend(sp) /* (CCBPtr sp) */ /* point past ADSP header (no attention) */ dp = ((char *) gbuf_wptr(mp)) + ADSP_FRAME_LEN; - UAL_ASSIGN(sp->f.pktFirstByteSeq, netdw(sp->firstRtmtSeq)); + UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->firstRtmtSeq); - UAS_ASSIGN(sp->of.version, netw(0x0100)); /* Fill in open connection parms */ - UAS_ASSIGN(sp->of.dstCID, sp->remCID); /* Destination CID */ - UAL_ASSIGN(sp->of.pktAttnRecvSeq, netdw(sp->attnRecvSeq)); + UAS_ASSIGN_HTON(sp->of.version, netw(0x0100)); /* Fill in open connection parms */ + UAS_ASSIGN_HTON(sp->of.dstCID, sp->remCID); /* Destination CID */ + UAL_ASSIGN_HTON(sp->of.pktAttnRecvSeq, sp->attnRecvSeq); bcopy((caddr_t) &sp->of, (caddr_t) dp, ADSP_OPEN_FRAME_LEN); len += ADSP_OPEN_FRAME_LEN; if (i & B_CTL_OREQ) { - UAS_ASSIGN(sp->f.CID, sp->locCID); + UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); mask = B_CTL_OREQ; sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OREQ; } else if (i & B_CTL_OACK) { - UAS_ASSIGN(sp->f.CID, sp->locCID); + UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); mask = B_CTL_OACK; sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OACK; } else if (i & B_CTL_OREQACK) { - UAS_ASSIGN(sp->f.CID, sp->locCID); + UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); mask = B_CTL_OREQACK; sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OREQACK; } else /* Deny */ @@ -219,7 +217,7 @@ void CheckSend(sp) /* (CCBPtr sp) */ } } else { /* seq # of next byte to send */ - UAL_ASSIGN(sp->f.pktFirstByteSeq, netdw(sp->sendSeq)); + UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); if (i & B_CTL_CLOSE) { sp->state = sClosed; /* Now we're closed */ @@ -298,7 +296,7 @@ void CheckSend(sp) /* (CCBPtr sp) */ } if (sp->sendDataAck) { - UAL_ASSIGN(sp->f.pktFirstByteSeq, netdw(sp->sendSeq)); /* seq # of next byte */ + UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); /* seq # of next byte */ attnMsg = 0; sp->f.descriptor = ADSP_CONTROL_BIT; goto sendit; @@ -309,7 +307,6 @@ void CheckSend(sp) /* (CCBPtr sp) */ */ if (mp) gbuf_freem(mp); - ATENABLE(s, sp->lock); if (mlist) adsp_sendddp(sp, mlist, 0, &sp->remoteAddress, DDP_ADSP); return; @@ -317,13 +314,13 @@ void CheckSend(sp) /* (CCBPtr sp) */ sendit: if (attnMsg) { - UAL_ASSIGN(sp->f.pktFirstByteSeq, netdw(sp->attnSendSeq)); - UAL_ASSIGN(sp->f.pktNextRecvSeq, netdw(sp->attnRecvSeq)); + UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->attnSendSeq); + UAL_ASSIGN_HTON(sp->f.pktNextRecvSeq, sp->attnRecvSeq); UAS_ASSIGN(sp->f.pktRecvWdw, 0); /* Always zero in attn pkt */ } else { sp->sendDataAck = 0; - UAL_ASSIGN(sp->f.pktNextRecvSeq, netdw(sp->recvSeq)); - UAS_ASSIGN(sp->f.pktRecvWdw, netw(CalcRecvWdw(sp))); + UAL_ASSIGN_HTON(sp->f.pktNextRecvSeq, sp->recvSeq); + UAS_ASSIGN_HTON(sp->f.pktRecvWdw, CalcRecvWdw(sp)); } if (use_attention_code) { bcopy((caddr_t) &sp->f, (caddr_t) gbuf_wptr(mp), ADSP_FRAME_LEN + 2); @@ -341,12 +338,10 @@ void CheckSend(sp) /* (CCBPtr sp) */ if (sp->state == sClosed) { /* must have sent a close advice */ /* send header + data */ - ATENABLE(s, sp->lock); adsp_sendddp(sp, mlist, 0, &sp->remoteAddress, DDP_ADSP); DoClose(sp, 0, -1); /* complete close! */ return; } - ATENABLE(s, sp->lock); if (sp->state == sClosing) /* See if we were waiting on this write */ CheckOkToClose(sp); goto top; @@ -401,7 +396,7 @@ attachData(sp, mp) * The easiest fix to this timing dilemma seems to be to reset * sendSeq to first Rtmt Seq if we're sending the first packet. */ - UAL_ASSIGN(sp->f.pktFirstByteSeq, netdw(sp->sendSeq)); + UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); if (smp = sp->sbuf_mb) /* Get oldest header */ eom = 1; diff --git a/bsd/netat/adsp_Init.c b/bsd/netat/adsp_Init.c index b6ab21997..8400cd061 100644 --- a/bsd/netat/adsp_Init.c +++ b/bsd/netat/adsp_Init.c @@ -50,7 +50,6 @@ #include #include -extern atlock_t adspgen_lock; /* * InitContinue @@ -70,7 +69,6 @@ static void InitContinue(sp, pb) /* (CCBPtr sp, DSPPBPtr pb, int soc) */ CCBPtr sp; struct adspcmd *pb; { - int s; /* Save connection's socket # in CCB */ sp->localSocket = pb->socket; @@ -78,9 +76,7 @@ static void InitContinue(sp, pb) /* (CCBPtr sp, DSPPBPtr pb, int soc) */ /* * Link the new ccb onto queue. Must be done with interrupts off. */ - ATDISABLE(s, adspgen_lock); qAddToEnd(AT_ADSP_STREAMS, sp); /* Put on linked list of connections */ - ATENABLE(s, adspgen_lock); return; } diff --git a/bsd/netat/adsp_Open.c b/bsd/netat/adsp_Open.c index d73e2ee94..f68b624e3 100644 --- a/bsd/netat/adsp_Open.c +++ b/bsd/netat/adsp_Open.c @@ -47,7 +47,6 @@ #include #include -extern atlock_t adspgen_lock; /* * NextCID @@ -61,12 +60,10 @@ extern atlock_t adspgen_lock; */ unsigned short NextCID() { - int s; unsigned short num; register CCB *queue; while (1) { - ATDISABLE(s, adspgen_lock); /* Disable interrupts */ num = ++adspGlobal.lastCID; /* qfind_w below is in 68K assembly */ /* point to the first element */ @@ -77,7 +74,6 @@ unsigned short NextCID() break; queue = queue->ccbLink; } - ATENABLE(s, adspgen_lock); if (queue == (CCBPtr)NULL) break; } @@ -241,7 +237,7 @@ int adspOpen(sp, pb) /* (DSPPBPtr pb) */ if (ocMode == ocEstablish) { /* Only set these if establish mode */ sp->recvSeq = pb->u.openParams.recvSeq; sp->attnRecvSeq = pb->u.openParams.attnRecvSeq; - UAS_ASSIGN(sp->f.CID, sp->locCID); /* Preset the CID in the ADSP header */ + UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); /* Preset the CID in the ADSP header */ /* This is done elsewhere for all other modes */ InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, sp->probeInterval); diff --git a/bsd/netat/adsp_Packet.c b/bsd/netat/adsp_Packet.c index 2b9a8f0ff..3293496f0 100644 --- a/bsd/netat/adsp_Packet.c +++ b/bsd/netat/adsp_Packet.c @@ -541,7 +541,6 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) gbuf_t *mp; ADSP_FRAMEPtr adspp; ADSP_OPEN_DATAPtr adspop; - int s; op = (ADSP_OPEN_DATAPtr)&f->data[0]; /* Point to Open-Connection parms */ len -= ADSP_FRAME_LEN; @@ -567,7 +566,7 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) adspop = (ADSP_OPEN_DATAPtr)gbuf_wptr(mp); gbuf_winc(mp,ADSP_OPEN_FRAME_LEN); UAS_UAS(adspop->dstCID, f->CID); - UAS_ASSIGN(adspop->version, 0x100); + UAS_ASSIGN_HTON(adspop->version, 0x100); adsp_sendddp(0, mp, DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN, &addr, DDP_ADSP); @@ -576,8 +575,8 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) m.addr = addr; m.socket = dsoc; m.descriptor = f->descriptor; - m.srcCID = UAS_VALUE(f->CID); - m.dstCID = UAS_VALUE(op->dstCID); /* On even-byte boundry */ + m.srcCID = UAS_VALUE_NTOH(f->CID); + m.dstCID = UAS_VALUE_NTOH(op->dstCID); /* On even-byte boundry */ m.idx = ((f->descriptor & ADSP_CONTROL_MASK) - 1) * 4; /* @@ -598,7 +597,6 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) (ProcPtr)MatchListener)) == 0) return 1; - ATDISABLE(s, sp->lock); p = (struct adspcmd *)&sp->opb; while (n = (struct adspcmd *)p->qLink) /* Hunt down list of listens */ { @@ -615,11 +613,10 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) p->qLink = n->qLink; /* Unlink this param block */ n->u.openParams.remoteCID = m.srcCID; *((AddrUnionPtr)&n->u.openParams.remoteAddress) = addr; - n->u.openParams.sendSeq = netdw(UAL_VALUE(f->pktNextRecvSeq)); - n->u.openParams.sendWindow = netw(UAS_VALUE(f->pktRecvWdw)); - n->u.openParams.attnSendSeq = netdw(UAL_VALUE(op->pktAttnRecvSeq)); + n->u.openParams.sendSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); + n->u.openParams.sendWindow = UAS_VALUE_NTOH(f->pktRecvWdw); + n->u.openParams.attnSendSeq = UAL_VALUE_NTOH(op->pktAttnRecvSeq); n->ioResult = 0; - ATENABLE(s, sp->lock); completepb(sp, n); /* complete copy of request */ /* complete(n, 0); */ return 0; @@ -629,29 +626,26 @@ static int RXConnection(gref, spPtr, f, len, addr, dsoc) } /* while */ - ATENABLE(s, sp->lock); return 1; } *spPtr = sp; /* Save ptr to stream we just found */ - ATDISABLE(s, sp->lock); sp->openState = m.t->openState; /* Move to next state (may be same) */ sp->state = m.t->state; /* Move to next state (may be same) */ if (m.t->action & A_SAVEPARMS) { /* Need to Save open-conn parms */ - sp->firstRtmtSeq = sp->sendSeq = netdw(UAL_VALUE(f->pktNextRecvSeq)); - sp->sendWdwSeq = netdw(UAL_VALUE(f->pktNextRecvSeq)) + netw(UAS_VALUE(f->pktRecvWdw)) - 1; - sp->attnSendSeq = netdw(UAL_VALUE(op->pktAttnRecvSeq)); /* on even boundry */ + sp->firstRtmtSeq = sp->sendSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); + sp->sendWdwSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq) + UAS_VALUE_NTOH(f->pktRecvWdw) - 1; + sp->attnSendSeq = UAL_VALUE_NTOH(op->pktAttnRecvSeq); /* on even boundry */ - sp->remCID = UAS_VALUE(f->CID); /* Save Source CID as RemCID */ + sp->remCID = UAS_VALUE_NTOH(f->CID); /* Save Source CID as RemCID */ UAS_UAS(sp->of.dstCID, f->CID); /* Save CID in open ctl packet */ sp->remoteAddress = addr; /* Save his address */ } - ATENABLE(s, sp->lock); if (m.t->action & A_DENY) { /* We've been denied ! */ DoClose(sp, errOpenDenied, -1); @@ -725,7 +719,6 @@ int adspPacket(gref, mp) int len; AddrUnion a; int dsoc; - int s; register DDPX_FRAME *ddp; /* DDP frame pointer */ register ADSP_FRAMEPtr f; /* Frame */ CCBPtr sp; @@ -737,7 +730,7 @@ int adspPacket(gref, mp) return -1; f = (ADSP_FRAMEPtr)(bp + DDPL_FRAME_LEN); - len = UAS_VALUE(ddp->ddpx_length) & 0x3ff; /* (ten bits of length) */ + len = UAS_VALUE_NTOH(ddp->ddpx_length) & 0x3ff; /* (ten bits of length) */ len -= DDPL_FRAME_LEN; if (len < (sizeof(ADSP_FRAME) - 1)) /* Packet too small */ return -1; /* mark the failure */ @@ -799,12 +792,10 @@ int adspPacket(gref, mp) /* This pkt may also ack some data we sent */ CheckRecvSeq(sp, f); RemoveTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer); - ATDISABLE(s, sp->lock); sp->sendSeq = sp->firstRtmtSeq; sp->pktSendCnt = 0; sp->waitingAck = 0; sp->callSend = 1; - ATENABLE(s, sp->lock); } else goto ignore; break; diff --git a/bsd/netat/adsp_Read.c b/bsd/netat/adsp_Read.c index fba5e4191..6c6c2390c 100644 --- a/bsd/netat/adsp_Read.c +++ b/bsd/netat/adsp_Read.c @@ -62,7 +62,6 @@ int CheckReadQueue(sp) /* (CCBPtr sp) */ register CCBPtr sp; { register struct adspcmd *pb; - int s; unsigned short cnt; char eom = 0; register gbuf_t *mp; @@ -72,7 +71,6 @@ int CheckReadQueue(sp) /* (CCBPtr sp) */ dPrintf(D_M_ADSP, D_L_TRACE, ("CheckReadQueue: sp=0x%x\n", (unsigned)sp)); KERNEL_DEBUG(DBG_ADSP_READ, 0, sp, sp->rbuf_mb, sp->rpb, sp->delay); trace_mbufs(D_M_ADSP_LOW, " bCQR m", sp->rbuf_mb); - ATDISABLE(s, sp->lock); while (sp->rData && (pb = sp->rpb)) { /* have data */ dPrintf(D_M_ADSP, D_L_TRACE, @@ -233,7 +231,6 @@ int CheckReadQueue(sp) /* (CCBPtr sp) */ sp->sendDataAck = 1; sp->callSend = 1; } - ATENABLE(s, sp->lock); KERNEL_DEBUG(DBG_ADSP_READ, 0x11, sp, 0, 0, 0); trace_mbufs(D_M_ADSP_LOW, " eCQR m", sp->rbuf_mb); @@ -257,14 +254,12 @@ int CheckAttn(sp, pb) /* (CCBPtr sp) */ register CCBPtr sp; register struct adspcmd *pb; { - int s; gbuf_t *mp; gref_t *gref; dPrintf(D_M_ADSP, D_L_TRACE, ("CheckAttn: sp=0x%x, pb=0x%x\n", (unsigned)sp, (unsigned)pb)); - ATDISABLE(s, sp->lock); if (mp = sp->attn_mb) { /* @@ -296,7 +291,6 @@ int CheckAttn(sp, pb) /* (CCBPtr sp) */ if (mp) { SndMsgUp(gref, mp); } - ATENABLE(s, sp->lock); return 0; } @@ -322,7 +316,6 @@ int adspRead(sp, pb) /* (DSPPBPtr pb) */ register struct adspcmd *pb; { register gbuf_t *mp; - int s; dPrintf(D_M_ADSP, D_L_TRACE, ("adspRead: sp=0x%x, pb=0x%x\n", (unsigned)sp, (unsigned)pb)); @@ -337,19 +330,15 @@ int adspRead(sp, pb) /* (DSPPBPtr pb) */ /* * It's OK to read on a closed, or closing session */ - ATDISABLE(s, sp->lock); if (sp->state != sOpen && sp->state != sClosing && sp->state != sClosed) { - ATENABLE(s, sp->lock); pb->ioResult = errState; return EINVAL; } if (sp->rData && (sp->rpb == 0)) { /* if data, and no queue of pbs */ qAddToEnd(&sp->rpb, pb); /* deliver data to user directly */ - ATENABLE(s, sp->lock); CheckReadQueue(sp); } else if ((pb->u.ioParams.reqCount == 0) && (sp->rpb == 0)) { /* empty read */ - ATENABLE(s, sp->lock); pb->ioResult = 0; adspioc_ack(0, pb->ioc, pb->gref); return 0; @@ -361,9 +350,7 @@ int adspRead(sp, pb) /* (DSPPBPtr pb) */ pb->ioc = 0; pb->mp = mp; qAddToEnd(&sp->rpb, pb); /* and queue it for later */ - ATENABLE(s, sp->lock); } else { - ATENABLE(s, sp->lock); pb->ioResult = errDSPQueueSize; return ENOBUFS; } diff --git a/bsd/netat/adsp_RxAttn.c b/bsd/netat/adsp_RxAttn.c index 8f62c8c1a..c4792ddb6 100644 --- a/bsd/netat/adsp_RxAttn.c +++ b/bsd/netat/adsp_RxAttn.c @@ -104,7 +104,7 @@ CCBPtr FindSender(f, a) /* (ADSP_FRAMEPtr f, AddrUnion a) */ MATCH_SENDER m; m.addr = a; - m.srcCID = UAS_VALUE(f->CID); + m.srcCID = UAS_VALUE_NTOH(f->CID); return (CCBPtr)qfind_m(AT_ADSP_STREAMS, &m, (ProcPtr)MatchSender); } @@ -141,7 +141,7 @@ int RXAttention(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ (char)(ADSP_ATTENTION_BIT | ADSP_ACK_REQ_BIT)) && /* Attention Data */ ((sp->userFlags & eAttention) == 0)) /* & he read the previous */ { - diff = netdw(UAL_VALUE(f->pktFirstByteSeq)) - sp->attnRecvSeq; + diff = UAL_VALUE_NTOH(f->pktFirstByteSeq) - sp->attnRecvSeq; if (diff > 0) /* Hey, he missed one */ return 1; @@ -177,7 +177,7 @@ int RXAttention(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ * Interrupts are OFF here, otherwise we have to do this atomically */ /* Check to see if this acknowledges anything */ - if ((sp->attnSendSeq + 1) == netdw(UAL_VALUE(f->pktNextRecvSeq))) { + if ((sp->attnSendSeq + 1) == UAL_VALUE_NTOH(f->pktNextRecvSeq)) { sp->attnSendSeq++; if ((pb = sp->sapb) == 0) { /* We never sent data ? !!! */ if (mp) diff --git a/bsd/netat/adsp_RxData.c b/bsd/netat/adsp_RxData.c index bb1174e09..02d732044 100644 --- a/bsd/netat/adsp_RxData.c +++ b/bsd/netat/adsp_RxData.c @@ -107,20 +107,18 @@ void CheckRecvSeq(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ register CCBPtr sp; register ADSP_FRAMEPtr f; { - int s; int pktNextRecvSeq; int sendWdwSeq; int eom; int hlen; register gbuf_t *mp; - ATDISABLE(s, sp->lock); if (f->descriptor & ADSP_ACK_REQ_BIT) { /* He wants an Ack */ sp->sendDataAck = 1; sp->callSend = 1; } - pktNextRecvSeq = netdw(UAL_VALUE(f->pktNextRecvSeq)); /* Local copy */ + pktNextRecvSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); /* Local copy */ /* * Make sure the sequence number corresponds to reality -- i.e. for @@ -235,7 +233,7 @@ void CheckRecvSeq(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ sp->callSend = 1; noack: - sendWdwSeq = netw(UAS_VALUE(f->pktRecvWdw)) - 1 + pktNextRecvSeq; + sendWdwSeq = UAS_VALUE_NTOH(f->pktRecvWdw) - 1 + pktNextRecvSeq; if (GT(sendWdwSeq, sp->sendWdwSeq)) /* Don't make send window smaller */ { @@ -243,7 +241,6 @@ void CheckRecvSeq(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ /* if we can send more data */ sp->sendWdwSeq = sendWdwSeq; } - ATENABLE(s, sp->lock); } /* @@ -269,7 +266,7 @@ int RXData(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ ADSP_FRAMEPtr f; int len; { - int s, offset; + int offset; int PktFirstByteSeq; short cnt; char eom; @@ -287,14 +284,12 @@ int RXData(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ trace_mbufs(D_M_ADSP, " mp", mp); - PktFirstByteSeq = netdw(UAL_VALUE(f->pktFirstByteSeq)); /* Local copy */ + PktFirstByteSeq = UAL_VALUE_NTOH(f->pktFirstByteSeq); /* Local copy */ - ATDISABLE(s, sp->lock); if (GT(PktFirstByteSeq, sp->recvSeq)) /* missed a packet (out of order) */ { if (sp->badSeqCnt++ > sp->badSeqCnt) /* Need to send rexmit advice */ sp->sendCtl |= B_CTL_RETRANSMIT; - ATENABLE(s, sp->lock); CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ CheckReadQueue(sp); gbuf_freem(mp); @@ -307,7 +302,6 @@ int RXData(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ } if (LTE(PktFirstByteSeq + len + eom, sp->recvSeq)) { /* duplicate data? */ - ATENABLE(s, sp->lock); CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ CheckReadQueue(sp); gbuf_freem(mp); @@ -382,7 +376,6 @@ int RXData(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ * doing anything that might take a long while */ - ATENABLE(s, sp->lock); CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ CheckReadQueue(sp); KERNEL_DEBUG(DBG_ADSP_RCV, 5, sp, sp->rbuf_mb, 0, 0); diff --git a/bsd/netat/adsp_Status.c b/bsd/netat/adsp_Status.c index 46880508b..4ec89730a 100644 --- a/bsd/netat/adsp_Status.c +++ b/bsd/netat/adsp_Status.c @@ -108,7 +108,6 @@ int adspStatus(sp, pb) /* (DSPPBPtr pb) */ { short err; short bytes; - int s; if (sp == 0) { pb->ioResult = errRefNum; @@ -116,7 +115,6 @@ int adspStatus(sp, pb) /* (DSPPBPtr pb) */ } pb->u.statusParams.ccbPtr = (TPCCB)sp; - ATDISABLE(s, sp->lock); /* * pending bytes in send queue @@ -142,7 +140,6 @@ int adspStatus(sp, pb) /* (DSPPBPtr pb) */ /* available buffer space in receive queue */ pb->u.statusParams.recvQFree = CalcRecvWdw(sp); - ATENABLE(s, sp->lock); pb->ioResult = 0; adspioc_ack(0, pb->ioc, pb->gref); return 0; diff --git a/bsd/netat/adsp_Timer.c b/bsd/netat/adsp_Timer.c index d528e7e3a..db7f045ac 100644 --- a/bsd/netat/adsp_Timer.c +++ b/bsd/netat/adsp_Timer.c @@ -70,13 +70,10 @@ void TimerTick(); void TrashSession(sp) /* (CCBPtr sp) */ CCBPtr sp; { - int s; - ATDISABLE(s, sp->lock); sp->userFlags |= eTearDown; sp->removing = 1; sp->state = sClosed; - ATENABLE(s, sp->lock); DoClose(sp, errAborted, 1); } @@ -94,10 +91,8 @@ void DoTimerElem(t) /* (TimerElemPtr t) */ TimerElemPtr t; { CCBPtr sp; - int s; sp = (CCBPtr)((Ptr)t - t->type); /* Recover stream pointer for this guy */ - ATDISABLE(s, sp->lock); if (t->type == kFlushTimerType) { /* flush write data time just fired */ if (sp->sData) { /* If there's any data, flush it. */ @@ -136,7 +131,6 @@ void DoTimerElem(t) /* (TimerElemPtr t) */ } else if (t->type == kProbeTimerType) { if (sp->state == sOpen || sp->state == sClosing) { if (--sp->probeCntr == 0) { /* Connection died */ - ATENABLE(s, sp->lock); TrashSession(sp); return; } else { @@ -151,7 +145,6 @@ void DoTimerElem(t) /* (TimerElemPtr t) */ { if (--sp->openRetrys == 0) { /* Oops, didn't open */ sp->state = sClosed; - ATENABLE(s, sp->lock); DoClose(sp, errOpening, 1); return; } /* open failed */ @@ -169,11 +162,9 @@ void DoTimerElem(t) /* (TimerElemPtr t) */ dPrintf(D_M_ADSP, D_L_ERROR, ("DoTimerElem:Unknown timer type!\n")); } - ATENABLE(s, sp->lock); return; send: - ATENABLE(s, sp->lock); CheckSend(sp); } diff --git a/bsd/netat/adsp_TimerElem.c b/bsd/netat/adsp_TimerElem.c index d7f61ac14..2ed056d3b 100644 --- a/bsd/netat/adsp_TimerElem.c +++ b/bsd/netat/adsp_TimerElem.c @@ -46,7 +46,6 @@ #include #include -atlock_t adsptmr_lock; extern void DoTimerElem(); /* (TimerElemPtr t); * External routine called to @@ -69,17 +68,13 @@ void InsertTimerElem(qhead, t, val) { TimerElemPtr p; /* parent pointer */ TimerElemPtr n; /* current */ - int s; - - ATDISABLE(s, adsptmr_lock); - + if (t->onQ) { /* - * someone else beat us to the punch and put this - * element back on the queue, just return in this case - */ - ATENABLE(s, adsptmr_lock); - return; + * someone else beat us to the punch and put this + * element back on the queue, just return in this case + */ + return; } p = (TimerElemPtr)qhead; @@ -101,7 +96,6 @@ void InsertTimerElem(qhead, t, val) t->timer = val; /* this is our value */ t->link = n; /* we point to n */ - ATENABLE(s, adsptmr_lock); } @@ -119,17 +113,13 @@ void RemoveTimerElem(qhead, t) /* (TimerElemPtr *qhead, TimerElemPtr t) */ { TimerElemPtr p; /* parent pointer */ TimerElemPtr n; /* current */ - int s; - - ATDISABLE(s, adsptmr_lock); - + if ( !t->onQ) { /* - * someone else beat us to the punch and took this - * element off of the queue, just return in this case - */ - ATENABLE(s, adsptmr_lock); - return; + * someone else beat us to the punch and took this + * element off of the queue, just return in this case + */ + return; } p = (TimerElemPtr)qhead; @@ -147,7 +137,6 @@ void RemoveTimerElem(qhead, t) /* (TimerElemPtr *qhead, TimerElemPtr t) */ p = n; } /* while */ - ATENABLE(s, adsptmr_lock); } @@ -165,29 +154,19 @@ void TimerQueueTick(qhead) /* (TimerElemPtr *qhead) */ { TimerElemPtr p; /* parent pointer */ TimerElemPtr n; /* current */ - int s; - - ATDISABLE(s, adsptmr_lock); - + p = (TimerElemPtr)qhead; - if (p->link) /* Is anything on queue? */ + if (p->link) { /* Is anything on queue? */ p->link->timer--; /* Yes, decrement by a tick */ - else - goto done; /* No, we're outta' here */ - - while ((n = p->link) && - (n->timer == 0)) /* Next guy needs to be serviced */ - { - p->link = n->link; /* Unlink us */ - n->onQ = 0; + while ((n = p->link) && + (n->timer == 0)) /* Next guy needs to be serviced */ + { + p->link = n->link; /* Unlink us */ + n->onQ = 0; - ATENABLE(s, adsptmr_lock); - DoTimerElem(n); - ATDISABLE(s, adsptmr_lock); + DoTimerElem(n); - p = (TimerElemPtr)qhead; - } /* while */ - -done: - ATENABLE(s, adsptmr_lock); + p = (TimerElemPtr)qhead; + } /* while */ + } } diff --git a/bsd/netat/adsp_Write.c b/bsd/netat/adsp_Write.c index 9a170cf50..4d2b1a632 100644 --- a/bsd/netat/adsp_Write.c +++ b/bsd/netat/adsp_Write.c @@ -67,7 +67,6 @@ int FillSendQueue(sp, pb) /* (CCBPtr sp) */ int eom; /* True if should set eom in header */ int cnt; /* # of bytes in this write */ int err = 0; - int s; cnt = pb->u.ioParams.reqCount - pb->u.ioParams.actCount; eom = pb->u.ioParams.eom ? F_EOM : 0; @@ -96,7 +95,6 @@ int FillSendQueue(sp, pb) /* (CCBPtr sp) */ } gbuf_cont(mb) = 0; - ATDISABLE(s, sp->lock); sp->sData = 1; /* note that there is data to send */ if ((mb = sp->csbuf_mb)) { /* add to the current message */ gbuf_linkb(mb, nmb); @@ -112,7 +110,6 @@ int FillSendQueue(sp, pb) /* (CCBPtr sp) */ sp->csbuf_mb = 0; /* if its done, no current buffer */ } pb->u.ioParams.actCount += cnt; /* Update count field in param blk */ - ATENABLE(s, sp->lock); if (pb->u.ioParams.actCount == pb->u.ioParams.reqCount) { /* Write is complete */ @@ -150,24 +147,20 @@ int adspWrite(sp, pb) /* (DSPPBPtr pb) */ CCBPtr sp; struct adspcmd *pb; { - int s; if (sp == 0) { pb->ioResult = errRefNum; return EINVAL; /* no stream, so drop the message */ } - ATDISABLE(s, sp->lock); if (sp->state != sOpen) { /* Not allowed */ pb->ioResult = errState; - ATENABLE(s, sp->lock); atalk_notify(sp->gref, ENOTCONN); gbuf_freem(pb->mp); return 0; } pb->u.ioParams.actCount = 0; /* Set # of bytes so far to zero */ - ATENABLE(s, sp->lock); FillSendQueue(sp, pb); /* Copy from write param block to send queue */ diff --git a/bsd/netat/adsp_attention.c b/bsd/netat/adsp_attention.c index 8a0510ac4..726692d45 100644 --- a/bsd/netat/adsp_attention.c +++ b/bsd/netat/adsp_attention.c @@ -72,7 +72,6 @@ */ int adspAttention(register struct adspcmd *pb, register CCBPtr sp) { - int s; register gbuf_t *mp, *nmp; unsigned char uerr; @@ -115,16 +114,13 @@ int adspAttention(register struct adspcmd *pb, register CCBPtr sp) } } pb->ioDirection = 1; /* outgoing attention data */ - ATDISABLE(s, sp->lock); if (sp->sapb) { /* Pending attentions already? */ - qAddToEnd(&sp->sapb, pb); /* Just add to end of queue */ - ATENABLE(s, sp->lock); + qAddToEnd(&sp->sapb, pb); /* Just add to end of queue */ } else { - sp->sendAttnData = 1; /* Start off this attention */ - pb->qLink = 0; - sp->sapb = pb; - ATENABLE(s, sp->lock); - CheckSend(sp); + sp->sendAttnData = 1; /* Start off this attention */ + pb->qLink = 0; + sp->sapb = pb; + CheckSend(sp); } pb->ioResult = 1; /* indicate that the IO is not complete */ return 0; diff --git a/bsd/netat/adsp_internal.h b/bsd/netat/adsp_internal.h index e5a48476d..129a6197d 100644 --- a/bsd/netat/adsp_internal.h +++ b/bsd/netat/adsp_internal.h @@ -244,9 +244,6 @@ typedef struct ccb { ADSP_OPEN_DATA of; /* Holds the data for the open exchange */ gref_t *gref; /* The queue associated with the CCB */ gbuf_t *sp_mp; - atlock_t lock; - atlock_t lockClose; - atlock_t lockRemove; } CCB, *CCBPtr; diff --git a/bsd/netat/adsp_misc.c b/bsd/netat/adsp_misc.c index 8ee925709..2b4efb778 100644 --- a/bsd/netat/adsp_misc.c +++ b/bsd/netat/adsp_misc.c @@ -44,7 +44,6 @@ * Modified for MP, 1996 by Tuyen Nguyen * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. */ -extern atlock_t adspgen_lock; struct qlink { @@ -124,17 +123,14 @@ void* qfind_m(qhead, match, compare_fnx) void *match; ProcPtr compare_fnx; { - int s; CCBPtr queue_item = qhead; - ATDISABLE(s, adspgen_lock); while (queue_item) { if ((*compare_fnx)(queue_item,match)) break; queue_item = queue_item->ccbLink; } - ATENABLE(s, adspgen_lock); return (queue_item); } diff --git a/bsd/netat/adsp_reset.c b/bsd/netat/adsp_reset.c index bcb738907..6f19c238e 100644 --- a/bsd/netat/adsp_reset.c +++ b/bsd/netat/adsp_reset.c @@ -72,10 +72,8 @@ int RXFReset(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ unsigned int hi; register gbuf_t *mp; register struct adspcmd *pb; - int s; - ATDISABLE(s, sp->lock); - pktFirstByteSeq = netdw(UAL_VALUE(f->pktFirstByteSeq)); + pktFirstByteSeq = UAL_VALUE_NTOH(f->pktFirstByteSeq); hi = sp->recvSeq + CalcRecvWdw(sp); @@ -114,7 +112,6 @@ int RXFReset(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ sp->callSend = 1; } - ATENABLE(s, sp->lock); return 0; } @@ -137,13 +134,11 @@ int RXFResetAck(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ ADSP_FRAMEPtr f; { unsigned int PktNextRecvSeq; - int s; if (sp->frpb == 0) /* Not expecting frwd reset Ack packet */ return 1; - ATDISABLE(s, sp->lock); - PktNextRecvSeq = netdw(UAL_VALUE(f->pktNextRecvSeq)); + PktNextRecvSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); if (BETWEEN(sp->sendSeq, PktNextRecvSeq, sp->sendWdwSeq+1)) { struct adspcmd *pb; @@ -170,7 +165,6 @@ int RXFResetAck(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ } } - ATENABLE(s, sp->lock); return 0; } @@ -193,7 +187,6 @@ int adspReset(sp, pb) /* (DSPPBPtr pb) */ CCBPtr sp; struct adspcmd *pb; { - int s; register gbuf_t *mp; register struct adspcmd *rpb; @@ -207,7 +200,6 @@ int adspReset(sp, pb) /* (DSPPBPtr pb) */ return EINVAL; } - ATDISABLE(s, sp->lock); while (mp = sp->sbuf_mb) { /* clear the send queue */ sp->sbuf_mb = gbuf_next(mp); @@ -235,7 +227,6 @@ int adspReset(sp, pb) /* (DSPPBPtr pb) */ * bookkeeping for it. yetch! */ adspioc_ack(0, pb->ioc, pb->gref); } - ATENABLE(s, sp->lock); CheckSend(sp); return STR_IGNORE; diff --git a/bsd/netat/adsp_stream.c b/bsd/netat/adsp_stream.c index 13811b3a8..424eb65b1 100644 --- a/bsd/netat/adsp_stream.c +++ b/bsd/netat/adsp_stream.c @@ -66,8 +66,6 @@ unsigned char adspAssignSocket(); int adspallocate(), adsprelease(); int adspInited = 0; -atlock_t adspall_lock; -atlock_t adspgen_lock; GLOBAL adspGlobal; /**********/ @@ -86,17 +84,14 @@ void adsp_input(mp) gref_t *gref; CCBPtr sp; at_ddp_t *p; - int s, l; gbuf_t *mb; switch (gbuf_type(mp)) { case MSG_DATA: p = (at_ddp_t *)gbuf_rptr(mp); - ATDISABLE(s, adspall_lock); sp = adsp_inputQ[p->dst_socket]; if ((sp == 0) || (sp->gref==0) || (sp->state==sClosed)) { - ATENABLE(s, adspall_lock); gbuf_freem(mp); return; } @@ -109,7 +104,6 @@ void adsp_input(mp) } while ((sp = sp->otccbLink) != 0); if (sp == 0) { - ATENABLE(s, adspall_lock); gbuf_freem(mp); return; } @@ -121,12 +115,9 @@ void adsp_input(mp) gbuf_next(mb) = mp; } else sp->deferred_mb = mp; - ATENABLE(s, adspall_lock); return; } - ATDISABLE(l, sp->lockRemove); sp->lockFlag = 1; - ATENABLE(l, adspall_lock); while (mp) { adsp_rput(sp->gref, mp); if ((mp = sp->deferred_mb) != 0) { @@ -135,7 +126,6 @@ void adsp_input(mp) } } sp->lockFlag = 0; - ATENABLE(s, sp->lockRemove); return; case MSG_IOCACK: @@ -185,7 +175,7 @@ int adsp_readable(gref) int adsp_writeable(gref) gref_t *gref; { - int s, rc; + int rc; CCBPtr sp; if (gref->info == 0) @@ -201,9 +191,7 @@ int adsp_writeable(gref) return(1); sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - ATDISABLE(s, sp->lock); rc = CalcSendQFree(sp); - ATENABLE(s, sp->lock); return rc; } @@ -229,7 +217,6 @@ int adsp_open(gref) gref_t *gref; { register CCBPtr sp; - int s; if (!adspInited) adsp_init(); @@ -240,28 +227,21 @@ int adsp_open(gref) sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); gref->readable = adsp_readable; gref->writeable = adsp_writeable; - ATDISABLE(s, adspall_lock); if ((sp->otccbLink = ccb_used_list) != 0) sp->otccbLink->ccbLink = sp; ccb_used_list = sp; - ATENABLE(s, adspall_lock); return 0; } int adsp_close(gref) gref_t *gref; { - int s, l; unsigned char localSocket; /* make sure we've not yet removed the CCB (e.g., due to TrashSession) */ - ATDISABLE(l, adspgen_lock); if (gref->info) { CCBPtr sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - ATDISABLE(s, sp->lock); - ATENABLE(s, adspgen_lock); localSocket = sp->localSocket; - ATENABLE(l, sp->lock); if (localSocket) adspRelease(gref); else @@ -270,7 +250,6 @@ int adsp_close(gref) gbuf_freeb((gbuf_t *)gref->info); } } else - ATENABLE(l, adspgen_lock); return 0; } @@ -336,7 +315,6 @@ int adsp_wput(gref, mp) gbuf_t *mp; { int rc; - int s; gbuf_t *xm; ioc_t *iocbp; CCBPtr sp; @@ -358,17 +336,14 @@ int adsp_wput(gref, mp) adsp_iocnak(gref, mp, EINVAL); } v = *(unsigned char *)gbuf_rptr(gbuf_cont(mp)); - ATDISABLE(s, adspall_lock); if ( (v != 0) && ((v > DDP_SOCKET_LAST) || (v < 2) || ddp_socket_inuse(v, DDP_ADSP))) { - ATENABLE(s, adspall_lock); iocbp->ioc_rval = -1; adsp_iocnak(gref, mp, EINVAL); } else { if (v == 0) { - ATENABLE(s, adspall_lock); if ((v = adspAssignSocket(gref, 0)) == 0) { iocbp->ioc_rval = -1; adsp_iocnak(gref, mp, EINVAL); @@ -378,7 +353,6 @@ int adsp_wput(gref, mp) adsp_inputC[v] = 1; adsp_inputQ[v] = sp; adsp_pidM[v] = sp->pid; - ATENABLE(s, adspall_lock); adsp_dequeue_ccb(sp); } *(unsigned char *)gbuf_rptr(gbuf_cont(mp)) = v; @@ -450,9 +424,7 @@ int adsp_wput(gref, mp) if (!gref->info) gbuf_freem(mp); else { - ATDISABLE(s, sp->lockClose); rc = adspWriteHandler(gref, mp); - ATENABLE(s, sp->lockClose); switch (rc) { case STR_PUTNEXT: @@ -537,12 +509,10 @@ adspAssignSocket(gref, flag) { unsigned char sVal, sMax, sMin, sSav, inputC; CCBPtr sp; - int s; sMax = flag ? DDP_SOCKET_LAST-46 : DDP_SOCKET_LAST-6; sMin = DDP_SOCKET_1st_DYNAMIC; - ATDISABLE(s, adspall_lock); for (inputC=255, sVal=sMax; sVal >= sMin; sVal--) { if (!ddp_socket_inuse(sVal, DDP_ADSP)) break; @@ -557,22 +527,17 @@ adspAssignSocket(gref, flag) } } if (sVal < sMin) { - if (!flag || (inputC == 255)) { - ATENABLE(s, adspall_lock); + if (!flag || (inputC == 255)) return 0; - } sVal = sSav; } sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - ATENABLE(s, adspall_lock); adsp_dequeue_ccb(sp); - ATDISABLE(s, adspall_lock); adsp_inputC[sVal]++; sp->otccbLink = adsp_inputQ[sVal]; adsp_inputQ[sVal] = sp; if (!flag) adsp_pidM[sVal] = sp->pid; - ATENABLE(s, adspall_lock); return sVal; } @@ -584,11 +549,9 @@ adspDeassignSocket(sp) CCBPtr curr_sp; CCBPtr prev_sp; int pid = 0; - int s, l; dPrintf(D_M_ADSP, D_L_TRACE, ("adspDeassignSocket: pid=%d,s=%d\n", sp->pid, sp->localSocket)); - ATDISABLE(s, adspall_lock); sVal = sp->localSocket; if ((curr_sp = adsp_inputQ[sVal]) != 0) { prev_sp = 0; @@ -597,12 +560,10 @@ adspDeassignSocket(sp) curr_sp = curr_sp->otccbLink; } if (curr_sp) { - ATDISABLE(l, sp->lockRemove); if (prev_sp) prev_sp->otccbLink = sp->otccbLink; else adsp_inputQ[sVal] = sp->otccbLink; - ATENABLE(l, sp->lockRemove); if (adsp_inputQ[sVal]) adsp_inputC[sVal]--; else { @@ -613,11 +574,9 @@ adspDeassignSocket(sp) sp->ccbLink = 0; sp->otccbLink = 0; sp->localSocket = 0; - ATENABLE(s, adspall_lock); return pid ? 0 : 1; } } - ATENABLE(s, adspall_lock); dPrintf(D_M_ADSP, D_L_ERROR, ("adspDeassignSocket: closing, no CCB block, trouble ahead\n")); @@ -631,9 +590,7 @@ void adsp_dequeue_ccb(sp) CCB *sp; { - int s; - ATDISABLE(s, adspall_lock); if (sp == ccb_used_list) { if ((ccb_used_list = sp->otccbLink) != 0) sp->otccbLink->ccbLink = 0; @@ -644,7 +601,6 @@ adsp_dequeue_ccb(sp) sp->otccbLink = 0; sp->ccbLink = 0; - ATENABLE(s, adspall_lock); } void SndMsgUp(gref, mp) diff --git a/bsd/netat/appletalk.h b/bsd/netat/appletalk.h index 615164c37..496f8af55 100644 --- a/bsd/netat/appletalk.h +++ b/bsd/netat/appletalk.h @@ -72,10 +72,22 @@ struct atalk_addr { #define UAL_UAL(x,y) *(unsigned long *) &(x[0]) = *(unsigned long *) &(y[0]) #define UAL_VALUE(x) (*(unsigned long *) &(x[0])) +/* Macros to assign unaligned fields with byte swapping */ +#define UAS_ASSIGN_HTON(x,s) *(unsigned short *) &(x[0]) = htons((unsigned short) (s)) +#define UAS_ASSIGN_NTOH(x,s) *(unsigned short *) &(x[0]) = ntohs((unsigned short) (s)) +#define UAS_VALUE_HTON(x) htons((*(unsigned short *) &(x[0]))) +#define UAS_VALUE_NTOH(x) ntohs((*(unsigned short *) &(x[0]))) +#define UAL_ASSIGN_HTON(x,l) *(unsigned long *) &(x[0]) = htonl((unsigned long) (l)) +#define UAL_ASSIGN_NTOH(x,l) *(unsigned long *) &(x[0]) = ntohl((unsigned long) (l)) +#define UAL_VALUE_HTON(x) htonl((*(unsigned long *) &(x[0]))) +#define UAL_VALUE_NTOH(x) ntohl((*(unsigned long *) &(x[0]))) + /* Macros to manipulate at_net variables */ -#define NET_ASSIGN(x,s) *(unsigned short *)&(x[0]) = (unsigned short)(s) -#define NET_NET(x, y) *(unsigned short *)&(x[0]) = *(unsigned short *)&(y[0]) -#define NET_VALUE(x) (*(unsigned short *) &(x[0])) +#define NET_ASSIGN(x,s) *(unsigned short *)&(x[0]) = htons((unsigned short)(s)) +#define NET_ASSIGN_NOSWAP(x,s) *(unsigned short *)&(x[0]) = (unsigned short)(s) +#define NET_NET(x, y) *(unsigned short *)&(x[0]) = *(unsigned short *)&(y[0]) +#define NET_VALUE(x) ntohs((*(unsigned short *) &(x[0]))) +#define NET_VALUE_NOSWAP(x) (*(unsigned short *) &(x[0])) #define ATALK_ASSIGN(a, net, node, unused ) \ a.atalk_unused = unused; a.atalk_node = node; NET_ASSIGN(a.atalk_net, net) @@ -261,7 +273,6 @@ typedef struct { #define IFID_HOME 1 /* home port in ifID_table */ #define ATALK_VALUE(a) ((*(u_long *) &(a))&0x00ffffff) -#define ATALK_EQUAL(a, b) (ATALK_VALUE(a) == ATALK_VALUE(b)) #define VERSION_LENGTH 80 /* length of version string */ diff --git a/bsd/netat/asp_proto.c b/bsd/netat/asp_proto.c index 265de3369..d371f8630 100644 --- a/bsd/netat/asp_proto.c +++ b/bsd/netat/asp_proto.c @@ -115,7 +115,6 @@ static unsigned char scb_tmo_cnt; asp_scb_t *scb_used_list; static asp_scb_t *scb_tmo_list; asp_scb_t *scb_free_list; -atlock_t aspall_lock, asptmo_lock; int asp_readable(gref) @@ -141,7 +140,6 @@ asp_init() int asp_open(gref) gref_t *gref; { - int s; asp_scb_t *scb; /* @@ -166,11 +164,9 @@ int asp_open(gref) scb->gref = gref; scb->session_timer = DEF_SESS_TMO; scb->cmd_retry = asp_def_retry; - ATDISABLE(s, aspall_lock); if ((scb->next_scb = scb_used_list) != 0) scb->next_scb->prev_scb = scb; scb_used_list = scb; - ATENABLE(s, aspall_lock); /* * return success @@ -186,7 +182,6 @@ int asp_close(gref) gref_t *gref; { - int s; unsigned char sock_num; asp_scb_t *scb, *new_scb; gbuf_t *m; @@ -200,17 +195,14 @@ asp_close(gref) * send the CloseSess response to peer */ if (gbuf_type(scb->sess_ioc) != MSG_PROTO) { - ATDISABLE(s, scb->lock); m = scb->sess_ioc; scb->sess_ioc = gbuf_next(m); - ATENABLE(s, scb->lock); atp_send_rsp(scb->gref, m, TRUE); } } if (scb->atp_state) { sock_num = scb->loc_addr.socket; - ATDISABLE(s, aspall_lock); if ((scb->dflag != 1) && scb->stat_msg) { untimeout(atp_retry_req, scb->stat_msg); gbuf_freem(scb->stat_msg); @@ -219,7 +211,6 @@ asp_close(gref) if (asp_scbQ[sock_num]->next_scb == 0) { asp_scbQ[sock_num] = 0; asp_inpC[sock_num] = 0; - ATENABLE(s, aspall_lock); dPrintf(D_M_ASP, D_L_INFO, (" : atp_close(), loc=%d\n", scb->loc_addr.socket)); atp_close(gref, 0); @@ -237,7 +228,6 @@ asp_close(gref) scb->next_scb->prev_scb = scb->prev_scb; } scb->next_scb = 0; - ATENABLE(s, aspall_lock); } } else asp_dequeue_scb(scb); @@ -245,7 +235,6 @@ asp_close(gref) /* * free all allocated blocks if any */ - ATDISABLE(s, scb->lock); if (scb->stat_msg) { gbuf_freem(scb->stat_msg); scb->stat_msg = 0; @@ -260,7 +249,6 @@ asp_close(gref) } scb->rem_addr.node = 0; - ATENABLE(s, scb->lock); /* * stop all timers @@ -381,7 +369,7 @@ int asp_wput(gref, m) gref_t *gref; gbuf_t *m; { - int s, err; + int err; unsigned char sockSav, sock_num; gbuf_t *mioc, *mdata; ioc_t *iocbp; @@ -546,7 +534,6 @@ int asp_wput(gref, m) if ((sock_num = (at_socket)atp_bind(gref, 0, &sockSav)) == 0) { atp_close(gref, 0); asp_dequeue_scb(scb); - ATDISABLE(s, aspall_lock); sock_num = sockSav; scb->loc_addr.socket = sock_num; for (curr_scb = asp_scbQ[sock_num]; @@ -554,14 +541,11 @@ int asp_wput(gref, m) scb->prev_scb = curr_scb; curr_scb->next_scb = scb; scb->atp_state = curr_scb->atp_state; - ATENABLE(s, aspall_lock); } else { asp_dequeue_scb(scb); - ATDISABLE(s, aspall_lock); scb->loc_addr.socket = sock_num; asp_scbQ[sock_num] = scb; scb->atp_state->dflag = scb->dflag; - ATENABLE(s, aspall_lock); } gref->info = (void *)scb; asp_inpC[sock_num]++; @@ -602,7 +586,7 @@ int asp_wput(gref, m) scb->session_timer = open_cmd->SessionTimer; aw.func = ASPFUNC_OpenSess; aw.param1 = scb->loc_addr.socket; - aw.param2 = ASP_Version; + aw.param2 = htons(ASP_Version); scb->ioc_wait = (unsigned char)(iocbp->ioc_cmd & 0xff); iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST_DEF; asp_send_req(gref, mioc, &open_cmd->SLSEntityIdentifier, @@ -848,7 +832,6 @@ asp_accept(scb, sess_scb, m) asp_scb_t *sess_scb; gbuf_t *m; { - int s; gbuf_t *mdata; at_ddp_t *ddp; at_atp_t *atp; @@ -880,9 +863,7 @@ asp_accept(scb, sess_scb, m) awp->param1 = sess_scb->sess_id; awp->param2 = 0; gbuf_freeb(m); - ATDISABLE(s, scb->lock); scb->sess_ioc = gbuf_next(mdata); - ATENABLE(s, scb->lock); gbuf_next(mdata) = 0; asp_timout(asp_hangup, sess_scb, sess_scb->session_timer); atp_send_rsp(scb->gref, mdata, TRUE); @@ -911,11 +892,9 @@ void asp_clock_locked(arg) void asp_clock(arg) void *arg; { - int s; asp_scb_t *scb; void (*tmo_func)(); - ATDISABLE(s, asptmo_lock); if (scb_tmo_list) scb_tmo_list->tmo_delta--; while (((scb = scb_tmo_list) != 0) && (scb_tmo_list->tmo_delta == 0)) { @@ -923,12 +902,9 @@ void asp_clock(arg) scb_tmo_list->prev_tmo = 0; if ((tmo_func = scb->tmo_func) != 0) { scb->tmo_func = 0; - ATENABLE(s, asptmo_lock); (*tmo_func)(scb); - ATDISABLE(s, asptmo_lock); } } - ATENABLE(s, asptmo_lock); if (++scb_tmo_cnt == 0) scb_tmo_cnt++; timeout(asp_clock_locked, (void *)arg, (1<func = 0; awp->param1 = 0; - awp->param2 = (unsigned short)ASPERR_BadVersNum; + awp->param2 = htons((unsigned short)ASPERR_BadVersNum); dPrintf(D_M_ASP, D_L_INFO, (" : version=%d\n", ASPERR_BadVersNum)); @@ -1100,7 +1075,6 @@ asp_ack_reply(gref, mioc) /* * queue the connection request */ - ATDISABLE(s, scb->lock); gbuf_next(mdata) = 0; if ((m = scb->sess_ioc) == 0) { scb->sess_ioc = mdata; @@ -1113,7 +1087,6 @@ asp_ack_reply(gref, mioc) m = gbuf_next(m); gbuf_next(m) = mdata; } - ATENABLE(s, scb->lock); dPrintf(D_M_ASP, D_L_INFO, (" : QUEUE connect request\n")); @@ -1122,7 +1095,7 @@ asp_ack_reply(gref, mioc) case ASPFUNC_Command: case ASPFUNC_Write: if ( (scb->sess_id != awp->param1) - || (scb->rcv_seq_num != awp->param2) + || (scb->rcv_seq_num != ntohs(awp->param2)) || BAD_REMADDR(rem_addr) ) { char era[8], ra[8]; sprintf(era,"%d.%d", scb->rem_addr.node,scb->rem_addr.socket); @@ -1144,12 +1117,11 @@ asp_ack_reply(gref, mioc) command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); command_ind->Primitive = (int)awp->func; command_ind->ReqRefNum = - *(unsigned short *)atp->tid; + ntohs(*(unsigned short *)atp->tid); command_ind->ReqType = awp->func; mdata = gbuf_strip(mdata); gbuf_cont(mioc) = mdata; - ATDISABLE(s, scb->lock); if (scb->req_flag) { if ((mx = scb->req_msgq) != 0) { while (gbuf_next(mx)) @@ -1157,10 +1129,8 @@ asp_ack_reply(gref, mioc) gbuf_next(mx) = mioc; } else scb->req_msgq = mioc; - ATENABLE(s, scb->lock); } else { scb->req_flag = 1; - ATENABLE(s, scb->lock); asp_putnext(scb->gref, mioc); } goto l_done; @@ -1177,7 +1147,7 @@ asp_ack_reply(gref, mioc) command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); command_ind->Primitive = (int)awp->func; command_ind->ReqRefNum = - *(unsigned short *)atp->tid; + ntohs(*(unsigned short *)atp->tid); command_ind->ReqType = awp->func; mdata = gbuf_strip(mdata); @@ -1246,12 +1216,10 @@ asp_ack_reply(gref, mioc) scb->rem_addr.socket)); gbuf_next(mdata) = 0; - ATDISABLE(s, scb->lock); if (scb->sess_ioc) gbuf_freel(scb->sess_ioc); scb->sess_ioc = mdata; scb->state = ASPSTATE_Close; - ATENABLE(s, scb->lock); /* * notify upstream of the CloseSess from peer @@ -1270,7 +1238,7 @@ asp_ack_reply(gref, mioc) command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); command_ind->Primitive = (int)awp->func; command_ind->ReqRefNum = - *(unsigned short *)atp->tid; + ntohs(*(unsigned short *)atp->tid); command_ind->ReqType = awp->func; scb->attn_tid = *(unsigned short *)atp->tid; scb->attn_flag = 1; @@ -1316,12 +1284,10 @@ asp_ack_reply(gref, mioc) case ASPSTATE_Idle: scb->rem_addr.node = 0; gbuf_freem(mioc); - ATDISABLE(s, scb->lock); if (scb->get_wait) wakeup(&scb->event); else atalk_notify_sel(gref); - ATENABLE(s, scb->lock); return; case ASPSTATE_WaitingForGetStatusRsp: @@ -1390,7 +1356,7 @@ asp_ack_reply(gref, mioc) atpBDS = (struct atpBDS *)gbuf_rptr(mx); cmdreply_ind = (asp_cmdreply_ind_t *)gbuf_rptr(mioc); cmdreply_ind->Primitive = ASPFUNC_CmdReply; - cmdreply_ind->CmdResult = *(int *)atpBDS->bdsUserData; + cmdreply_ind->CmdResult = ntohl(*(int *)atpBDS->bdsUserData); gbuf_wset(mioc,sizeof(asp_cmdreply_ind_t)); gbuf_freeb(mx); asp_putnext(scb->gref, mioc); @@ -1586,9 +1552,7 @@ StaticProc void asp_dequeue_scb(scb) asp_scb_t *scb; { - int s; - ATDISABLE(s, aspall_lock); if (scb == scb_used_list) { if ((scb_used_list = scb->next_scb) != 0) scb->next_scb->prev_scb = 0; @@ -1596,7 +1560,6 @@ asp_dequeue_scb(scb) if ((scb->prev_scb->next_scb = scb->next_scb) != 0) scb->next_scb->prev_scb = scb->prev_scb; } - ATENABLE(s, aspall_lock); scb->next_scb = 0; scb->prev_scb = 0; @@ -1610,11 +1573,9 @@ asp_find_scb(sock_num, rem_addr) unsigned char sock_num; at_inet_t *rem_addr; { - int s; asp_scb_t *scb; asp_scb_t *alt_scb = 0; - ATDISABLE(s, aspall_lock); for (scb = asp_scbQ[sock_num]; scb; scb = scb->next_scb) { if ((scb->rem_addr.net == rem_addr->net) && (scb->rem_addr.node == rem_addr->node)) { @@ -1634,7 +1595,6 @@ asp_find_scb(sock_num, rem_addr) rem_addr->node, rem_addr->socket)); } - ATENABLE(s, aspall_lock); return scb; } @@ -1648,7 +1608,6 @@ asp_timout(func, scb, seconds) register asp_scb_t *scb; int seconds; { - int s; unsigned char sum; register asp_scb_t *curr_scb, *prev_scb; @@ -1659,11 +1618,9 @@ asp_timout(func, scb, seconds) scb->tmo_delta = (seconds>>SESS_TMO_RES); scb->tmo_cnt = scb_tmo_cnt; - ATDISABLE(s, asptmo_lock); if (scb_tmo_list == 0) { scb->next_tmo = scb->prev_tmo = 0; scb_tmo_list = scb; - ATENABLE(s, asptmo_lock); return; } @@ -1697,7 +1654,6 @@ asp_timout(func, scb, seconds) scb_tmo_list->prev_tmo = scb; scb_tmo_list = scb; } - ATENABLE(s, asptmo_lock); } /* @@ -1708,12 +1664,10 @@ asp_untimout(func, scb) void (*func)(); register asp_scb_t *scb; { - int s; if ((scb->tmo_cnt == scb_tmo_cnt) || (scb->tmo_func == 0)) return; - ATDISABLE(s, asptmo_lock); if (scb_tmo_list == scb) { if ((scb_tmo_list = scb->next_tmo) != 0) { scb_tmo_list->prev_tmo = 0; @@ -1727,7 +1681,6 @@ asp_untimout(func, scb) scb->prev_tmo = 0; } scb->tmo_func = 0; - ATENABLE(s, asptmo_lock); } /* @@ -1742,7 +1695,6 @@ asp_hangup(scb) /* * set the state to Close */ - ATDISABLE(s, scb->lock); scb->state = ASPSTATE_Close; if (scb->tickle_tid) { atp_cancel_req(scb->gref, (unsigned int)scb->tickle_tid); @@ -1753,15 +1705,11 @@ asp_hangup(scb) * notify upstream of the hangup */ if (scb->rem_addr.node) { - if (scb->get_wait) { + if (scb->get_wait) wakeup(&scb->event); - ATENABLE(s, scb->lock); - } else { - ATENABLE(s, scb->lock); + else atalk_notify_sel(scb->gref); - } - } else - ATENABLE(s, scb->lock); + } } StaticProc void @@ -1804,17 +1752,13 @@ asp_iocnak(gref, mioc, err) StaticProc asp_scb_t * asp_scb_alloc() { - int s, i; + int i; gbuf_t *m; asp_scb_t *scb, *scb_array; - ATDISABLE(s, aspall_lock); if (scb_free_list == 0) { if ((m = gbuf_alloc(SCBS_PER_BLK*sizeof(asp_scb_t), PRI_MED)) == 0) - { - ATENABLE(s, aspall_lock); return (asp_scb_t *)0; - } bzero((char *)gbuf_rptr(m), SCBS_PER_BLK*sizeof(asp_scb_t)); gbuf_cont(m) = scb_resource_m; scb_resource_m = m; @@ -1827,9 +1771,6 @@ asp_scb_alloc() scb = scb_free_list; scb_free_list = scb->next_scb; - ATENABLE(s, aspall_lock); - ATLOCKINIT(scb->lock); - ATLOCKINIT(scb->delay_lock); ATEVENTINIT(scb->event); ATEVENTINIT(scb->delay_event); @@ -1843,13 +1784,10 @@ StaticProc void asp_scb_free(scb) asp_scb_t *scb; { - int s; bzero((char *)scb, sizeof(asp_scb_t)); - ATDISABLE(s, aspall_lock); scb->next_scb = scb_free_list; scb_free_list = scb; - ATENABLE(s, aspall_lock); } /* @@ -1860,7 +1798,6 @@ asp_putnext(gref, mproto) gref_t *gref; gbuf_t *mproto; { - int s; gbuf_t *m; asp_scb_t *scb; @@ -1869,7 +1806,6 @@ asp_putnext(gref, mproto) /* * queue the message. */ - ATDISABLE(s, scb->lock); gbuf_next(mproto) = 0; if ((m = scb->sess_ioc) == 0) scb->sess_ioc = mproto; @@ -1882,14 +1818,11 @@ asp_putnext(gref, mproto) if (scb->rcv_cnt >= MAX_RCV_CNT) scb->snd_stop = 1; - if (scb->get_wait) { + if (scb->get_wait) wakeup(&scb->event); - ATENABLE(s, scb->lock); - } else if (mproto == scb->sess_ioc) { - ATENABLE(s, scb->lock); + else if (mproto == scb->sess_ioc) atalk_notify_sel(gref); - } else - ATENABLE(s, scb->lock); + } /* asp_putnext */ /* @@ -1913,7 +1846,7 @@ asp_putnext(gref, mproto) int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, int flags, int *errp) { - int s, i, err, len, offset, remain, size, copy_len; + int i, err, len, offset, remain, size, copy_len; gbuf_t *mioc, *mdata, *mx, *m0; ioc_t *iocbp; strbuf_t ctlbuf; @@ -2097,7 +2030,7 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in awp = (asp_word_t *)atp->user_bytes; awp->func = (unsigned char)Primitive; awp->param1 = scb->sess_id; - awp->param2 = scb->snd_seq_num; + awp->param2 = htons(scb->snd_seq_num); iocbp->ioc_private = (void *)scb; iocbp->ioc_count = gbuf_len(mdata); iocbp->ioc_rval = 0; @@ -2106,7 +2039,6 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in /* * send the command/write/write_continue/attention request */ - ATDISABLE(s, scb->lock); switch (awp->func) { case ASPFUNC_Command: scb->state = ASPSTATE_WaitingForCommandRsp; @@ -2116,7 +2048,7 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in break; case ASPFUNC_WriteContinue: scb->state = ASPSTATE_WaitingForWriteContinueRsp; - awp->param2 = scb->wrt_seq_num; + awp->param2 = htons(scb->wrt_seq_num); break; case ASPFUNC_Attention: scb->state = ASPSTATE_WaitingForCommandRsp; @@ -2124,10 +2056,9 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in atp->xo_relt = 0; atp->bitmap = 0x01; gbuf_wdec(mdata,2); - awp->param2 = *(unsigned short *)gbuf_wptr(mdata); + awp->param2 = htons(*(unsigned short *)gbuf_wptr(mdata)); break; } - ATENABLE(s, scb->lock); dPrintf(D_M_ASP,D_L_INFO, ("ASPputmsg: %s, loc=%d, rem=%x.%x.%d\n", (awp->func == ASPFUNC_Command ? "CommandReq" : @@ -2140,17 +2071,14 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in case ASPFUNC_CmdReply: - ATDISABLE(s, scb->lock); if (scb->req_msgq) { mx = scb->req_msgq; scb->req_msgq = gbuf_next(mx); gbuf_next(mx) = 0; - ATENABLE(s, scb->lock); asp_putnext(scb->gref, mx); - } else { + } else scb->req_flag = 0; - ATENABLE(s, scb->lock); - } + result = primitives->CmdReplyReq.CmdResult; tid = primitives->CmdReplyReq.ReqRefNum; @@ -2168,7 +2096,7 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in ddp->dst_socket = scb->reply_socket; ddp->type = DDP_ATP; UAS_ASSIGN(ddp->checksum, 0); - UAS_ASSIGN(atp->tid, tid); + UAS_ASSIGN(atp->tid, htons(tid)); if (scb->attn_flag && (tid == scb->attn_tid)) { scb->attn_flag = 0; atp->xo = 0; @@ -2215,7 +2143,7 @@ int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, in /* bms: make this callable from kernel. reply date is passed back as a mbuf chain in *mreply */ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, int *flags, int *errp) { - int err, s, len, sum, rval; + int err, len, sum, rval; gbuf_t *mproto, *mdata; strbuf_t ctlbuf; strbuf_t datbuf; @@ -2231,11 +2159,8 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, return -1; } - ATDISABLE(s, scb->lock); - if (scb->state == ASPSTATE_Close) { - ATENABLE(s, scb->lock); + if (scb->state == ASPSTATE_Close) return 0; - } /* * get receive data @@ -2246,25 +2171,20 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, err = msleep(&scb->event, atalk_mutex, PSOCK | PCATCH, "aspgetmsg", 0); if (err != 0) { scb->get_wait = 0; - ATENABLE(s, scb->lock); *errp = err; return -1; } if (scb->state == ASPSTATE_Close) { scb->get_wait = 0; - ATENABLE(s, scb->lock); return 0; } } get_wait = scb->get_wait; scb->get_wait = 0; - if ((ctlptr == 0) && (datptr == 0)) { - ATENABLE(s, scb->lock); + if ((ctlptr == 0) && (datptr == 0)) return 0; - } scb->sess_ioc = gbuf_next(mproto); mdata = gbuf_cont(mproto); - ATENABLE(s, scb->lock); /* last remaining use of MSG_ERROR */ if (gbuf_type(mproto) == MSG_ERROR) { @@ -2289,10 +2209,8 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, goto l_err; } if ((datbuf.maxlen < 0) || (datbuf.maxlen < gbuf_msgsize(mdata))) { - ATDISABLE(s, scb->lock); gbuf_next(mproto) = scb->sess_ioc; scb->sess_ioc = mproto; - ATENABLE(s, scb->lock); return MOREDATA; } @@ -2302,13 +2220,9 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, * we're not supposed to dequeue messages in the Streams * head's read queue this way; but there is no better way. */ - ATDISABLE(s, scb->lock); - if (scb->sess_ioc == 0) { - ATENABLE(s, scb->lock); - } else { - ATENABLE(s, scb->lock); + if (scb->sess_ioc != 0) atalk_notify_sel(gref); - } + } /* @@ -2373,21 +2287,17 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, gbuf_freem(mproto); } - ATDISABLE(s, scb->lock); if (scb->sess_ioc) scb->rcv_cnt--; else { scb->rcv_cnt = 0; scb->snd_stop = 0; } - ATENABLE(s, scb->lock); return rval; l_err: - ATDISABLE(s, scb->lock); gbuf_next(mproto) = scb->sess_ioc; scb->sess_ioc = mproto; - ATENABLE(s, scb->lock); *errp = err; return -1; } diff --git a/bsd/netat/at.c b/bsd/netat/at.c index 3909aa1d9..65039b6a4 100644 --- a/bsd/netat/at.c +++ b/bsd/netat/at.c @@ -71,8 +71,7 @@ struct etalk_addr ttalk_multicast_addr = { {0xC0, 0x00, 0x40, 0x00, 0x00, 0x00}}; /* called only in router mode */ -static int set_zones(ifz) - zone_usage_t *ifz; +static int set_zones(zone_usage_t *ifz) /* 1. adds zone to table 2. looks up each route entry from zone list @@ -141,7 +140,6 @@ at_control(so, cmd, data, ifp) if ((cmd & 0xffff) == 0xff99) { u_long fixed_command; - char ioctl_buffer[32]; /* *** this is a temporary hack to get at_send_to_dev() to work with BSD-style sockets instead of the special purpose system calls, ATsocket() and ATioctl(). @@ -386,13 +384,11 @@ at_control(so, cmd, data, ifp) /* Normal case; no tuple found for this name, so insert * this tuple in the registry and return ok response. */ - ATDISABLE(nve_lock, NVE_LOCK); if ((error2 = nbp_new_nve_entry(&nve, ifID)) == 0) { nbpP->addr.net = ifID->ifThisNode.s_net; nbpP->addr.node = ifID->ifThisNode.s_node; nbpP->unique_nbp_id = nve.unique_nbp_id; } - ATENABLE(nve_lock, NVE_LOCK); return(error2); break; @@ -408,16 +404,13 @@ at_control(so, cmd, data, ifp) /* delete by id */ if (nbpP->unique_nbp_id) { - ATDISABLE(nve_lock, NVE_LOCK); TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { if (nve_entry->unique_nbp_id == nbpP->unique_nbp_id) { /* Found a match! */ nbp_delete_entry(nve_entry); - ATENABLE(nve_lock, NVE_LOCK); return(0); } } - ATENABLE(nve_lock, NVE_LOCK); return(EADDRNOTAVAIL); } @@ -437,9 +430,7 @@ at_control(so, cmd, data, ifp) if ((nve_entry = nbp_find_nve(&nve)) == NULL) continue; - ATDISABLE(nve_lock, NVE_LOCK); nbp_delete_entry(nve_entry); - ATENABLE(nve_lock, NVE_LOCK); found = TRUE; } if (found) @@ -455,9 +446,7 @@ at_control(so, cmd, data, ifp) /* Normal case; tuple found for this name, so delete * the entry from the registry and return ok response. */ - ATDISABLE(nve_lock, NVE_LOCK); nbp_delete_entry(nve_entry); - ATENABLE(nve_lock, NVE_LOCK); return(0); break; @@ -665,21 +654,17 @@ at_control(so, cmd, data, ifp) #endif case SIOCSETOT: { - int s; struct atpcb *at_pcb, *clonedat_pcb; int cloned_fd = *(int *)data; - s = splnet(); /* XXX */ at_pcb = sotoatpcb(so); /* let's make sure it's either -1 or a valid file descriptor */ if (cloned_fd != -1) { struct socket *cloned_so; error = file_socket(cloned_fd, &cloned_so); - if (error){ - splx(s); /* XXX */ + if (error) break; - } clonedat_pcb = sotoatpcb(cloned_so); } else { clonedat_pcb = NULL; @@ -690,7 +675,6 @@ at_control(so, cmd, data, ifp) } else { at_pcb->ddp_flags = clonedat_pcb->ddp_flags; } - splx(s); /* XXX */ file_drop(cloned_fd); break; } diff --git a/bsd/netat/at_aarp.h b/bsd/netat/at_aarp.h index 61bd23db8..7abb25815 100644 --- a/bsd/netat/at_aarp.h +++ b/bsd/netat/at_aarp.h @@ -103,7 +103,7 @@ typedef struct { /*************************************************/ typedef struct { - struct atalk_addr dest_at_addr; + struct atalk_addr dest_at_addr; /* net# in network byte order */ struct etalk_addr dest_addr; char dummy[2]; /* pad out to struct size of 32 */ time_t last_time; /* the last time that this addr @@ -128,19 +128,22 @@ typedef struct { #define AMT_HASH(a) \ ((NET_VALUE(((struct atalk_addr *)&a)->atalk_net) + ((struct atalk_addr *)&a)->atalk_node) % AMT_NB) +/* at_addr - net # in network byte order */ #define AMT_LOOK(at, at_addr, elapp) { \ register n; \ at = &aarp_table[elapp->ifPort]->et_aarp_amt[AMT_HASH(at_addr) * AMT_BSIZ]; \ for (n = 0 ; ; at++) { \ - if (ATALK_EQUAL(at->dest_at_addr, at_addr)) \ + if (at->dest_at_addr.atalk_node == (at_addr).atalk_node && \ + NET_EQUAL(at->dest_at_addr.atalk_net, (at_addr).atalk_net)) \ break; \ if (++n >= AMT_BSIZ) { \ at = NULL; \ break; \ } \ } \ - } +} +/* at_addr - net # in network byte order */ #define NEW_AMT(at, at_addr, elapp) { \ register n; \ register aarp_amt_t *myat; \ @@ -153,7 +156,7 @@ typedef struct { break; \ } \ } \ - } +} #define AARP_NET_MCAST(p, elapp) \ (NET_VALUE((p)->dst_net) == elapp->ifThisNode.s_net) \ diff --git a/bsd/netat/at_pcb.c b/bsd/netat/at_pcb.c index 09b6801e2..4594fccc5 100644 --- a/bsd/netat/at_pcb.c +++ b/bsd/netat/at_pcb.c @@ -183,7 +183,6 @@ int at_pcbbind(pcb, nam) register struct atpcb *pcb; struct sockaddr *nam; { - register struct socket *so = pcb->atpcb_socket; register struct sockaddr_at *local = (struct sockaddr_at *) nam; u_char ddpsock = local->sat_port; diff --git a/bsd/netat/at_proto.c b/bsd/netat/at_proto.c index d0e0934e9..74e1808e6 100644 --- a/bsd/netat/at_proto.c +++ b/bsd/netat/at_proto.c @@ -71,7 +71,6 @@ extern int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, extern int ddp_pru_shutdown(struct socket *so); extern int ddp_pru_sockaddr(struct socket *so, struct sockaddr **nam); -void atalk_dominit(); /* * Dummy usrreqs struct created by Ted for FreeBSD 3.x integration. @@ -85,31 +84,45 @@ struct pr_usrreqs ddp_usrreqs = { ddp_pru_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; -struct domain atalkdomain; +extern struct domain atalkdomain; +extern void atalk_dominit(void); + struct protosw atalksw[] = { { SOCK_RAW, &atalkdomain, /*protocol*/ 0, PR_ATOMIC|PR_ADDR, /*input*/ 0, /*output*/ 0, /*clinput*/ 0, ddp_ctloutput, /*ousrreq*/ 0, ddp_init, /*fastto*/ 0, /*slowto*/ 0, /*drain*/ 0, /*sysctl*/ 0, &ddp_usrreqs, - 0, 0, 0 + 0, 0, 0, /*lock, unlock, getlock */ + {0, 0}, 0, {0} /* filters */ } }; struct domain atalkdomain = -{ AF_APPLETALK, "appletalk", atalk_dominit, 0, 0, - atalksw, 0, - 0, 0, 0, - DDP_X_HDR_SIZE, 0 +{ AF_APPLETALK, + "appletalk", + atalk_dominit, + 0, + 0, + atalksw, + 0, + 0, /* dom_rtattach */ + 0, 0, /* dom_rtoffset, dom_maxrtkey */ + DDP_X_HDR_SIZE, 0, + 0, /* domain global mutex */ + 0, /* domain flags */ + {0, 0} /*reserved[2] */ }; struct domain * atalkdom = &atalkdomain; lck_mtx_t *atalk_mutex = NULL; +static int at_saved_lock, at_saved_unlock; + SYSCTL_NODE(_net, PF_APPLETALK, appletalk, CTLFLAG_RW, 0, "AppleTalk Family"); void -atalk_dominit() +atalk_dominit(void) { atalk_mutex = atalkdom->dom_mtx; } @@ -117,24 +130,22 @@ atalk_dominit() void atalk_lock() { - int error = 0, lr, lr_saved; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + int lr_saved; + lr_saved = (unsigned int) __builtin_return_address(0); + lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(atalkdom->dom_mtx); + at_saved_lock = lr_saved; } void atalk_unlock() { - int error = 0, lr, lr_saved; -#ifdef __ppc__ - __asm__ volatile("mflr %0" : "=r" (lr)); - lr_saved = lr; -#endif + int lr_saved; + lr_saved = (unsigned int) __builtin_return_address(0); + lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_OWNED); + at_saved_unlock = lr_saved; lck_mtx_unlock(atalkdom->dom_mtx); } diff --git a/bsd/netat/at_var.h b/bsd/netat/at_var.h index 9eb7c5795..fdb445d3b 100644 --- a/bsd/netat/at_var.h +++ b/bsd/netat/at_var.h @@ -310,8 +310,8 @@ struct kev_atalk_data { void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *address, at_nvestr_t *zone); void aarp_sched_probe(void *); -void atalk_lock(); -void atalk_unlock(); +void atalk_lock(void); +void atalk_unlock(void); #endif /* KERNEL_PRIVATE */ #endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/netat/atp.h b/bsd/netat/atp.h index 421c265f0..87a474a89 100644 --- a/bsd/netat/atp.h +++ b/bsd/netat/atp.h @@ -79,13 +79,22 @@ #define ATP_XO_8MIN 4 typedef struct { +#if BYTE_ORDER == BIG_ENDIAN unsigned cmd : 2, xo : 1, eom : 1, sts : 1, xo_relt : 3; +#endif +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned xo_relt : 3, + sts : 1, + eom : 1, + xo : 1, + cmd : 2; +#endif u_char bitmap; - ua_short tid; + ua_short tid; ua_long user_bytes; u_char data[ATP_DATA_SIZE]; } at_atp_t; diff --git a/bsd/netat/atp_alloc.c b/bsd/netat/atp_alloc.c index 98ebd3d46..783a64ab9 100644 --- a/bsd/netat/atp_alloc.c +++ b/bsd/netat/atp_alloc.c @@ -51,28 +51,23 @@ #define TRPS_PER_BLK 16 gbuf_t *atp_resource_m = 0; -extern atlock_t atpgen_lock; extern caddr_t atp_free_cluster_list; extern void atp_delete_free_clusters(); struct atp_trans *atp_trans_alloc(atp) struct atp_state *atp; { - int s; int i; gbuf_t *m; register struct atp_trans *trp, *trp_array; - ATDISABLE(s, atpgen_lock); if (atp_trans_free_list == 0) { - ATENABLE(s, atpgen_lock); if ((m = gbuf_alloc(TRPS_PER_BLK*sizeof(struct atp_trans),PRI_HI)) == 0) return (struct atp_trans *)0; bzero(gbuf_rptr(m), TRPS_PER_BLK*sizeof(struct atp_trans)); trp_array = (struct atp_trans *)gbuf_rptr(m); for (i=0; i < TRPS_PER_BLK-1; i++) trp_array[i].tr_list.next = (struct atp_trans *)&trp_array[i+1]; - ATDISABLE(s, atpgen_lock); gbuf_cont(m) = atp_resource_m; atp_resource_m = m; trp_array[i].tr_list.next = atp_trans_free_list; @@ -81,11 +76,9 @@ struct atp_state *atp; trp = atp_trans_free_list; atp_trans_free_list = trp->tr_list.next; - ATENABLE(s, atpgen_lock); trp->tr_queue = atp; trp->tr_state = TRANS_TIMEOUT; trp->tr_local_node = 0; - ATLOCKINIT(trp->tr_lock); ATEVENTINIT(trp->tr_event); dPrintf(D_M_ATP_LOW, D_L_TRACE, @@ -102,13 +95,10 @@ struct atp_state *atp; void atp_trans_free(trp) register struct atp_trans *trp; { - int s; - ATDISABLE(s, atpgen_lock); trp->tr_queue = 0; trp->tr_list.next = atp_trans_free_list; atp_trans_free_list = trp; - ATENABLE(s, atpgen_lock); } /* @@ -121,16 +111,13 @@ struct atp_rcb *atp_rcb_alloc(atp) struct atp_state *atp; { register struct atp_rcb *rcbp; - int s; - ATDISABLE(s, atpgen_lock); if ((rcbp = atp_rcb_free_list) != NULL) { atp_rcb_free_list = rcbp->rc_list.next; rcbp->rc_queue = atp; rcbp->rc_pktcnt = 0; rcbp->rc_local_node = 0; } - ATENABLE(s, atpgen_lock); dPrintf(D_M_ATP_LOW, D_L_TRACE, ("atp_rcb_alloc: allocated rcbp 0x%x\n", (u_int) rcbp)); return(rcbp); @@ -147,14 +134,11 @@ register struct atp_rcb *rcbp; register struct atp_state *atp; register int i; register int rc_state; - int s; dPrintf(D_M_ATP_LOW, D_L_TRACE, ("atp_rcb_free: freeing rcbp 0x%x\n", (u_int) rcbp)); - ATDISABLE(s, atpgen_lock); atp = rcbp->rc_queue; if ((rc_state = rcbp->rc_state) == -1) { - ATENABLE(s, atpgen_lock); dPrintf(D_M_ATP, D_L_WARNING, ("atp_rcb_free(%d): tid=%d,loc=%d,rem=%d\n", 0, rcbp->rc_tid, @@ -195,5 +179,4 @@ register struct atp_rcb *rcbp; } rcbp->rc_list.next = atp_rcb_free_list; atp_rcb_free_list = rcbp; - ATENABLE(s, atpgen_lock); } diff --git a/bsd/netat/atp_misc.c b/bsd/netat/atp_misc.c index 91fecf154..5c70dd94d 100644 --- a/bsd/netat/atp_misc.c +++ b/bsd/netat/atp_misc.c @@ -48,7 +48,6 @@ #include #include -extern atlock_t atpgen_lock; void atp_free(); void atp_send(struct atp_trans *); @@ -61,7 +60,6 @@ void atp_req_timeout(trp) register struct atp_trans *trp; { - int s; register gbuf_t *m; gref_t *gref; struct atp_state *atp; @@ -69,19 +67,15 @@ register struct atp_trans *trp; if ((atp = trp->tr_queue) == 0) return; - ATDISABLE(s, atp->atp_lock); - if (atp->atp_flags & ATP_CLOSING) { - ATENABLE(s, atp->atp_lock); + if (atp->atp_flags & ATP_CLOSING) return; - } + for (ctrp = atp->atp_trans_wait.head; ctrp; ctrp = ctrp->tr_list.next) { if (ctrp == trp) break; } - if (ctrp != trp) { - ATENABLE(s, atp->atp_lock); + if (ctrp != trp) return; - } if ((m = gbuf_cont(trp->tr_xmt)) == NULL) m = trp->tr_xmt; /* issued via the new interface */ @@ -95,7 +89,6 @@ register struct atp_trans *trp; *gbuf_rptr(m) = 99; gbuf_set_type(m, MSG_DATA); gref = trp->tr_queue->atp_gref; - ATENABLE(s, atp->atp_lock); atalk_putnext(gref, m); return; @@ -109,13 +102,11 @@ register struct atp_trans *trp; if (trp->tr_queue->dflag) ((ioc_t *)gbuf_rptr(m))->ioc_cmd = AT_ATP_REQUEST_COMPLETE; else if (trp->tr_bdsp == NULL) { - ATENABLE(s, atp->atp_lock); gbuf_freem(m); if (trp->tr_rsp_wait) wakeup(&trp->tr_event); break; } - ATENABLE(s, atp->atp_lock); atp_iocnak(trp->tr_queue, m, ETIMEDOUT); atp_free(trp); return; @@ -131,7 +122,6 @@ register struct atp_trans *trp; if (trp->tr_retry != (unsigned int) ATP_INFINITE_RETRIES) trp->tr_retry--; - ATENABLE(s, atp->atp_lock); atp_send(trp); } } @@ -148,12 +138,10 @@ register struct atp_trans *trp; { register struct atp_state *atp; register int i; - int s; dPrintf(D_M_ATP_LOW, D_L_TRACE, ("atp_free: freeing trp 0x%x\n", (u_int) trp)); - ATDISABLE(s, atpgen_lock); if (trp->tr_state == TRANS_ABORTING) { ATP_Q_REMOVE(atp_trans_abort, trp, tr_list); @@ -185,12 +173,10 @@ register struct atp_trans *trp; trp->tr_state = TRANS_ABORTING; ATP_Q_APPEND(atp_trans_abort, trp, tr_list); wakeup(&trp->tr_event); - ATENABLE(s, atpgen_lock); return; } } - ATENABLE(s, atpgen_lock); atp_trans_free(trp); } /* atp_free */ @@ -245,10 +231,8 @@ register struct atp_rcb *rcbp; { register struct atp_state *atp; register int i; - int s; if ((atp = rcbp->rc_queue) != 0) { - ATDISABLE(s, atp->atp_lock); for (i = 0; i < rcbp->rc_pktcnt; i++) { if (rcbp->rc_bitmap&atp_mask[i]) rcbp->rc_snd[i] = 1; @@ -258,10 +242,8 @@ register struct atp_rcb *rcbp; if (rcbp->rc_rep_waiting == 0) { rcbp->rc_state = RCB_SENDING; rcbp->rc_rep_waiting = 1; - ATENABLE(s, atp->atp_lock); atp_send_replies(atp, rcbp); - } else - ATENABLE(s, atp->atp_lock); + } } } @@ -272,26 +254,22 @@ register struct atp_rcb *rcbp; void atp_rcb_timer() { - int s; - register struct atp_rcb *rcbp; + register struct atp_rcb *rcbp; register struct atp_rcb *next_rcbp; extern struct atp_rcb_qhead atp_need_rel; extern struct atp_trans *trp_tmo_rcb; struct timeval timenow; l_again: - ATDISABLE(s, atpgen_lock); getmicrouptime(&timenow); for (rcbp = atp_need_rel.head; rcbp; rcbp = next_rcbp) { next_rcbp = rcbp->rc_tlist.next; - if (abs(timenow.tv_sec - rcbp->rc_timestamp) > 30) { - ATENABLE(s, atpgen_lock); + if ((timenow.tv_sec - rcbp->rc_timestamp) > 30) { atp_rcb_free(rcbp); goto l_again; } } - ATENABLE(s, atpgen_lock); atp_timout(atp_rcb_timer, trp_tmo_rcb, 10 * HZ); } @@ -342,9 +320,7 @@ register struct atp_state *atp; { register int i; register struct atp_trans *trp; - int s; - ATDISABLE(s, atpgen_lock); for (i = lasttid;;) { i = (i+1)&0xffff; @@ -354,7 +330,6 @@ register struct atp_state *atp; } if (trp == NULL) { lasttid = i; - ATENABLE(s, atpgen_lock); return(i); } } diff --git a/bsd/netat/atp_open.c b/bsd/netat/atp_open.c index 8f07c0332..16f2a8eb0 100644 --- a/bsd/netat/atp_open.c +++ b/bsd/netat/atp_open.c @@ -58,9 +58,6 @@ int atp_inited = 0; struct atp_rcb_qhead atp_need_rel; -atlock_t atpall_lock; -atlock_t atptmo_lock; -atlock_t atpgen_lock; /**********/ int atp_pidM[256]; @@ -130,7 +127,7 @@ int atp_open(gref, flag) int flag; { register struct atp_state *atp; - register int s, i; + register int i; vm_offset_t temp; /* @@ -170,18 +167,14 @@ int atp_open(gref, flag) * If no atp structure available return failure */ - ATDISABLE(s, atpall_lock); - if ((atp = atp_free_list) == NULL) { - ATENABLE(s, atpall_lock); + if ((atp = atp_free_list) == NULL) return(EAGAIN); - } /* * Update free list */ atp_free_list = atp->atp_trans_waiting; - ATENABLE(s, atpall_lock); /* * Initialize the data structure @@ -199,8 +192,6 @@ int atp_open(gref, flag) atp->atp_socket_no = -1; atp->atp_pid = gref->pid; atp->atp_msgq = 0; - ATLOCKINIT(atp->atp_lock); - ATLOCKINIT(atp->atp_delay_lock); ATEVENTINIT(atp->atp_event); ATEVENTINIT(atp->atp_delay_event); gref->info = (void *)atp; @@ -210,11 +201,9 @@ int atp_open(gref, flag) */ if (flag) { - ATDISABLE(s, atpall_lock); if ((atp->atp_trans_waiting = atp_used_list) != 0) atp->atp_trans_waiting->atp_rcb_waiting = atp; atp_used_list = atp; - ATENABLE(s, atpall_lock); } return(0); } @@ -232,7 +221,6 @@ int atp_close(gref, flag) register struct atp_state *atp; register struct atp_trans *trp; register struct atp_rcb *rcbp; - register int s; int socket; pid_t pid; @@ -244,7 +232,6 @@ int atp_close(gref, flag) atp->atp_msgq = 0; } - ATDISABLE(s, atp->atp_lock); atp->atp_flags |= ATP_CLOSING; socket = atp->atp_socket_no; if (socket != -1) @@ -265,7 +252,6 @@ int atp_close(gref, flag) atp_rcb_free(rcbp); while ((rcbp = atp->atp_attached.head)) atp_rcb_free(rcbp); - ATENABLE(s, atp->atp_lock); if (flag && (socket == -1)) atp_dequeue_atp(atp); @@ -273,11 +259,9 @@ int atp_close(gref, flag) /* * free the state variable */ - ATDISABLE(s, atpall_lock); atp->atp_socket_no = -1; atp->atp_trans_waiting = atp_free_list; atp_free_list = atp; - ATENABLE(s, atpall_lock); if (socket != -1) { pid = (pid_t)atp_pidM[socket]; diff --git a/bsd/netat/atp_read.c b/bsd/netat/atp_read.c index 25690a3c6..aacef82b2 100644 --- a/bsd/netat/atp_read.c +++ b/bsd/netat/atp_read.c @@ -94,9 +94,10 @@ gbuf_t *m; { register at_atp_t *athp; register struct atp_state *atp; - register int s, s_gen; + register int s_gen; gbuf_t *m_asp = NULL; struct timeval timenow; + u_short temp_net; atp = (struct atp_state *)gref->info; if (atp->dflag) @@ -131,15 +132,14 @@ gbuf_t *m; { register struct atp_trans *trp; register int seqno; - register at_ddp_t *ddp; + register at_ddp_t *ddp; /* * we just got a response, find the trans record */ - ATDISABLE(s, atp->atp_lock); for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - if (trp->tr_tid == UAS_VALUE(athp->tid)) + if (trp->tr_tid == UAS_VALUE_NTOH(athp->tid)) break; } @@ -148,12 +148,11 @@ gbuf_t *m; */ seqno = athp->bitmap; if (trp == NULL) { - ATENABLE(s, atp->atp_lock); ddp = AT_DDP_HDR(m); dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n", - UAS_VALUE(athp->tid), - ddp->dst_socket,ddp->src_node,ddp->src_socket,seqno)); + UAS_VALUE_NTOH(athp->tid), + ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno)); gbuf_freem(m); return; } @@ -162,11 +161,10 @@ gbuf_t *m; * If no longer valid, drop it */ if (trp->tr_state == TRANS_FAILED) { - ATENABLE(s, atp->atp_lock); ddp = AT_DDP_HDR(m); dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), ("atp_rput: dropping TRESP, failed trp,tid=%d,loc=%d,rem=%d.%d\n", - UAS_VALUE(athp->tid), + UAS_VALUE_NTOH(athp->tid), ddp->dst_socket, ddp->src_node, ddp->src_socket)); gbuf_freem(m); return; @@ -176,11 +174,10 @@ gbuf_t *m; * If we have already received it, ignore it */ if (!(trp->tr_bitmap&atp_mask[seqno]) || trp->tr_rcv[seqno]) { - ATENABLE(s, atp->atp_lock); ddp = AT_DDP_HDR(m); dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n", - UAS_VALUE(athp->tid), + UAS_VALUE_NTOH(athp->tid), ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno)); gbuf_freem(m); return; @@ -211,7 +208,6 @@ gbuf_t *m; * the message to the user */ if (trp->tr_bitmap == 0) { - ATENABLE(s, atp->atp_lock); /* * Cancel the request timer and any @@ -229,12 +225,10 @@ gbuf_t *m; /* * If they want treq again, send them */ - ATENABLE(s, atp->atp_lock); atp_untimout(atp_req_timeout, trp); atp_send(trp); return; } - ATENABLE(s, atp->atp_lock); return; } @@ -247,9 +241,8 @@ gbuf_t *m; */ ddp = AT_DDP_HDR(m); - ATDISABLE(s, atp->atp_lock); for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE(athp->tid) && + if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && rcbp->rc_socket.node == ddp->src_node && rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && rcbp->rc_socket.socket == ddp->src_socket) { @@ -263,14 +256,11 @@ gbuf_t *m; { ddp = 0; atp_rcb_free(rcbp); - ATENABLE(s, atp->atp_lock); } break; } } - if (ddp) - ATENABLE(s, atp->atp_lock); gbuf_freem(m); return; } @@ -289,9 +279,8 @@ gbuf_t *m; */ ddp = AT_DDP_HDR(m); - ATDISABLE(s, atp->atp_lock); for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE(athp->tid) && + if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && rcbp->rc_socket.node == ddp->src_node && rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && rcbp->rc_socket.socket == ddp->src_socket) @@ -310,11 +299,10 @@ gbuf_t *m; */ /* we just did this, why do again? -jjs 4-10-95 */ for (rcbp = atp->atp_attached.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE(athp->tid) && + if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && rcbp->rc_socket.node == ddp->src_node && rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && rcbp->rc_socket.socket == ddp->src_socket) { - ATENABLE(s, atp->atp_lock); gbuf_freem(m); dPrintf(D_M_ATP_LOW, D_L_INPUT, ("atp_rput: dropping TREQ, matches req queue\n")); @@ -326,20 +314,19 @@ gbuf_t *m; * assume someone is interested in * in an asynchronous incoming request */ - ATENABLE(s, atp->atp_lock); if ((rcbp = atp_rcb_alloc(atp)) == NULL) { gbuf_freem(m); return; } rcbp->rc_state = RCB_UNQUEUED; - ATDISABLE(s, atp->atp_lock); rcbp->rc_local_node = ddp->dst_node; - NET_NET(rcbp->rc_local_net, ddp->dst_net); + temp_net = NET_VALUE(ddp->dst_net); + NET_ASSIGN_NOSWAP(rcbp->rc_local_net, temp_net); rcbp->rc_socket.socket = ddp->src_socket; rcbp->rc_socket.node = ddp->src_node; rcbp->rc_socket.net = NET_VALUE(ddp->src_net); - rcbp->rc_tid = UAS_VALUE(athp->tid); + rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); rcbp->rc_bitmap = athp->bitmap; rcbp->rc_not_sent_bitmap = athp->bitmap; rcbp->rc_xo = athp->xo; @@ -376,7 +363,6 @@ gbuf_t *m; rcbp->rc_state = RCB_PENDING; ATP_Q_APPEND(atp->atp_attached, rcbp, rc_list); if (m_asp != NULL) { - ATENABLE(s, atp->atp_lock); atp_req_ind(atp, m_asp); return; } @@ -400,16 +386,13 @@ gbuf_t *m; * the replies */ getmicrouptime(&timenow); - ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp) { rcbp->rc_timestamp = timenow.tv_sec; if (rcbp->rc_timestamp == 0) rcbp->rc_timestamp = 1; } - ATENABLE(s_gen, atpgen_lock); rcbp->rc_bitmap = athp->bitmap; rcbp->rc_not_sent_bitmap = athp->bitmap; - ATENABLE(s, atp->atp_lock); gbuf_freem(m); atp_reply(rcbp); return; @@ -421,12 +404,10 @@ gbuf_t *m; * we haven't sent any data yet * ignore the request */ - ATENABLE(s, atp->atp_lock); gbuf_freem(m); return; } } - ATENABLE(s, atp->atp_lock); return; } diff --git a/bsd/netat/atp_write.c b/bsd/netat/atp_write.c index d84ecb6ff..4c510abe1 100644 --- a/bsd/netat/atp_write.c +++ b/bsd/netat/atp_write.c @@ -74,9 +74,6 @@ extern struct atp_rcb_qhead atp_need_rel; extern int atp_inited; extern struct atp_state *atp_used_list; extern asp_scb_t *scb_free_list; -extern atlock_t atpgen_lock; -extern atlock_t atpall_lock; -extern atlock_t atptmo_lock; extern gbuf_t *scb_resource_m; extern gbuf_t *atp_resource_m; @@ -139,7 +136,7 @@ atp_wput(gref, m) register gbuf_t *m; { register ioc_t *iocbp; - int i, xcnt, s; + int i, xcnt; struct atp_state *atp; struct atp_trans *trp; struct atp_rcb *rcbp; @@ -237,18 +234,16 @@ atp_wput(gref, m) atp->atp_msgq = 0; } - ATDISABLE(s, atp->atp_lock); /* * search for the corresponding rcb */ for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE(athp->tid) && + if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && rcbp->rc_socket.node == ddp->dst_node && rcbp->rc_socket.net == NET_VALUE(ddp->dst_net) && rcbp->rc_socket.socket == ddp->dst_socket) break; } - ATENABLE(s, atp->atp_lock); /* * If it has already been sent then return an error @@ -269,13 +264,11 @@ atp_wput(gref, m) rcbp->rc_socket.socket = ddp->dst_socket; rcbp->rc_socket.node = ddp->dst_node; rcbp->rc_socket.net = NET_VALUE(ddp->dst_net); - rcbp->rc_tid = UAS_VALUE(athp->tid); + rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); rcbp->rc_bitmap = 0xff; rcbp->rc_xo = 0; - ATDISABLE(s, atp->atp_lock); rcbp->rc_state = RCB_SENDING; ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - ATENABLE(s, atp->atp_lock); } xcnt = get_bds_entries(m2); if ((i = atp_unpack_bdsp(atp, m2, rcbp, xcnt, FALSE))) { @@ -303,7 +296,6 @@ atp_wput(gref, m) /* * search for a waiting request */ - ATDISABLE(s, atp->atp_lock); if ((rcbp = atp->atp_attached.head)) { /* * Got one, move it to the active response Q @@ -320,13 +312,11 @@ atp_wput(gref, m) */ atp_rcb_free(rcbp); } - ATENABLE(s, atp->atp_lock); atp_iocack(atp, m); } else { /* * None available - can out */ - ATENABLE(s, atp->atp_lock); atp_iocnak(atp, m, EAGAIN); } break; @@ -343,16 +333,13 @@ atp_wput(gref, m) i = *(int *)gbuf_rptr(gbuf_cont(m)); gbuf_freem(gbuf_cont(m)); gbuf_cont(m) = NULL; - ATDISABLE(s, atp->atp_lock); for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { if (trp->tr_tid == i) break; } - if (trp == NULL) { - ATENABLE(s, atp->atp_lock); + if (trp == NULL) atp_iocnak(atp, m, ENOENT); - } else { - ATENABLE(s, atp->atp_lock); + else { atp_free(trp); atp_iocack(atp, m); } @@ -430,7 +417,7 @@ register struct atp_trans *trp; gbuf_wset(m,TOTAL_ATP_HDR_SIZE); ddp = AT_DDP_HDR(m); ddp->type = DDP_ATP; - UAS_ASSIGN(ddp->checksum, 0); + UAS_ASSIGN_HTON(ddp->checksum, 0); ddp->dst_socket = trp->tr_socket.socket; ddp->dst_node = trp->tr_socket.node; NET_ASSIGN(ddp->dst_net, trp->tr_socket.net); @@ -443,7 +430,7 @@ register struct atp_trans *trp; athp = AT_ATP_HDR(m); ATP_CLEAR_CONTROL(athp); athp->cmd = ATP_CMD_TREL; - UAS_ASSIGN(athp->tid, trp->tr_tid); + UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); } return (m); @@ -454,7 +441,7 @@ void atp_send_replies(atp, rcbp) register struct atp_rcb *rcbp; { register gbuf_t *m; register int i, len; - int s_gen, s, cnt, err, offset, space; + int s_gen, cnt, err, offset, space; unsigned char *m0_rptr = NULL, *m0_wptr = NULL; register at_atp_t *athp; register struct atpBDS *bdsp; @@ -468,11 +455,8 @@ void atp_send_replies(atp, rcbp) }; struct timeval timenow; - ATDISABLE(s, atp->atp_lock); - if (rcbp->rc_queue != atp) { - ATENABLE(s, atp->atp_lock); + if (rcbp->rc_queue != atp) return; - } if (rcbp->rc_not_sent_bitmap == 0) goto nothing_to_send; @@ -560,21 +544,17 @@ void atp_send_replies(atp, rcbp) */ bdsp++; } - if (mlist) { - ATENABLE(s, atp->atp_lock); + if (mlist) DDP_OUTPUT(mlist); - ATDISABLE(s, atp->atp_lock); - } + nothing_to_send: /* * If all replies from this reply block have been sent then * remove it from the queue and mark it so */ - if (rcbp->rc_queue != atp) { - ATENABLE(s, atp->atp_lock); + if (rcbp->rc_queue != atp) return; - } rcbp->rc_rep_waiting = 0; /* @@ -589,7 +569,6 @@ void atp_send_replies(atp, rcbp) */ if (rcbp->rc_xo && rcbp->rc_state != RCB_RELEASED) { getmicrouptime(&timenow); - ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp == 0) { rcbp->rc_timestamp = timenow.tv_sec; if (rcbp->rc_timestamp == 0) @@ -597,10 +576,8 @@ void atp_send_replies(atp, rcbp) ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); } rcbp->rc_state = RCB_RESPONSE_FULL; - ATENABLE(s_gen, atpgen_lock); } else atp_rcb_free(rcbp); - ATENABLE(s, atp->atp_lock); } /* atp_send_replies */ @@ -688,7 +665,7 @@ atp_unpack_bdsp(atp, m, rcbp, cnt, wait) register gbuf_t *m2, *m1, *m0, *mhdr; caddr_t lastPage; at_atp_t *athp; - int i, len, s_gen; + int i, len; at_socket src_socket; struct ddp_atp { @@ -834,13 +811,11 @@ atp_unpack_bdsp(atp, m, rcbp, cnt, wait) l_send: if (rcbp->rc_xo) { getmicrouptime(&timenow); - ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp == 0) { if ((rcbp->rc_timestamp = timenow.tv_sec) == 0) rcbp->rc_timestamp = 1; ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); } - ATENABLE(s_gen, atpgen_lock); } DDP_OUTPUT(mlist); @@ -862,7 +837,6 @@ int atp_bind(gref, sVal, flag) unsigned char inpC, sNextUsed = 0; unsigned int sMin, sMax, sSav; struct atp_state *atp; - int s; atp = (struct atp_state *)gref->info; if (atp->dflag) @@ -870,7 +844,6 @@ int atp_bind(gref, sVal, flag) sMax = ATP_SOCKET_LAST; sMin = ATP_SOCKET_FIRST; - ATDISABLE(s, atpgen_lock); if (flag && (*flag == 3)) { sMin += 40; if (sMin < sNext) { @@ -883,7 +856,6 @@ int atp_bind(gref, sVal, flag) ((sVal > sMax) || (sVal < 2) || (sVal == 6) || (ddp_socket_inuse(sVal, DDP_ATP) && (atp_inputQ[sVal] != (gref_t *)1)))) { - ATENABLE(s, atpgen_lock); return 0; } @@ -913,7 +885,6 @@ int atp_bind(gref, sVal, flag) sNext = 0; *flag = (unsigned char)sSav; } - ATENABLE(s, atpgen_lock); return 0; } } @@ -927,7 +898,6 @@ int atp_bind(gref, sVal, flag) sNext = 0; } - ATENABLE(s, atpgen_lock); return (int)sVal; } @@ -936,19 +906,16 @@ void atp_req_ind(atp, mioc) register gbuf_t *mioc; { register struct atp_rcb *rcbp; - int s; if ((rcbp = atp->atp_attached.head) != 0) { gbuf_cont(mioc) = rcbp->rc_ioctl; rcbp->rc_ioctl = NULL; - ATDISABLE(s, atp->atp_lock); if (rcbp->rc_xo) { ATP_Q_REMOVE(atp->atp_attached, rcbp, rc_list); rcbp->rc_state = RCB_NOTIFIED; ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); } else atp_rcb_free(rcbp); - ATENABLE(s, atp->atp_lock); if (gbuf_cont(mioc)) ((ioc_t *)gbuf_rptr(mioc))->ioc_count = gbuf_msgsize(gbuf_cont(mioc)); else @@ -1001,7 +968,6 @@ void atp_cancel_req(gref, tid) gref_t *gref; unsigned short tid; { - int s; struct atp_state *atp; struct atp_trans *trp; @@ -1009,12 +975,10 @@ void atp_cancel_req(gref, tid) if (atp->dflag) atp = (struct atp_state *)atp->atp_msgq; - ATDISABLE(s, atp->atp_lock); for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { if (trp->tr_tid == tid) break; } - ATENABLE(s, atp->atp_lock); if (trp != NULL) atp_free(trp); } @@ -1026,9 +990,7 @@ void atp_dequeue_atp(atp) struct atp_state *atp; { - int s; - ATDISABLE(s, atpall_lock); if (atp == atp_used_list) { if ((atp_used_list = atp->atp_trans_waiting) != 0) atp->atp_trans_waiting->atp_rcb_waiting = 0; @@ -1040,7 +1002,6 @@ atp_dequeue_atp(atp) atp->atp_trans_waiting = 0; atp->atp_rcb_waiting = 0; - ATENABLE(s, atpall_lock); } void @@ -1049,15 +1010,11 @@ atp_timout(func, trp, ticks) struct atp_trans *trp; int ticks; { - int s; unsigned int sum; struct atp_trans *curr_trp, *prev_trp; - ATDISABLE(s, atptmo_lock); - if (trp->tr_tmo_func) { - ATENABLE(s, atptmo_lock); + if (trp->tr_tmo_func) return; - } trp->tr_tmo_func = func; trp->tr_tmo_delta = 1+(ticks>>5); @@ -1065,7 +1022,6 @@ atp_timout(func, trp, ticks) if (trp_tmo_list == 0) { trp->tr_tmo_next = trp->tr_tmo_prev = 0; trp_tmo_list = trp; - ATENABLE(s, atptmo_lock); return; } @@ -1099,7 +1055,6 @@ atp_timout(func, trp, ticks) trp_tmo_list->tr_tmo_prev = trp; trp_tmo_list = trp; } - ATENABLE(s, atptmo_lock); } void @@ -1107,13 +1062,9 @@ atp_untimout(func, trp) void (*func)(); struct atp_trans *trp; { - int s; - ATDISABLE(s, atptmo_lock); - if (trp->tr_tmo_func == 0) { - ATENABLE(s, atptmo_lock); + if (trp->tr_tmo_func == 0) return; - } if (trp_tmo_list == trp) { if ((trp_tmo_list = trp->tr_tmo_next) != 0) { @@ -1127,7 +1078,6 @@ atp_untimout(func, trp) } } trp->tr_tmo_func = 0; - ATENABLE(s, atptmo_lock); } void @@ -1143,11 +1093,9 @@ void atp_trp_clock(arg) void *arg; { - int s; struct atp_trans *trp; void (*tr_tmo_func)(); - ATDISABLE(s, atptmo_lock); if (trp_tmo_list) trp_tmo_list->tr_tmo_delta--; while (((trp = trp_tmo_list) != 0) && (trp_tmo_list->tr_tmo_delta == 0)) { @@ -1155,12 +1103,9 @@ atp_trp_clock(arg) trp_tmo_list->tr_tmo_prev = 0; if ((tr_tmo_func = trp->tr_tmo_func) != 0) { trp->tr_tmo_func = 0; - ATENABLE(s, atptmo_lock); (*tr_tmo_func)(trp); - ATDISABLE(s, atptmo_lock); } } - ATENABLE(s, atptmo_lock); timeout(atp_trp_clock_locked, (void *)arg, (1<<5)); } @@ -1177,8 +1122,9 @@ atp_send_req(gref, mioc) register at_ddp_t *ddp; gbuf_t *m, *m2, *bds; struct atp_set_default *sdb; - int s, old; + int old; unsigned int timer; + u_short temp_net; atp = (struct atp_state *)((struct atp_state *)gref->info)->atp_msgq; iocbp = (ioc_t *)gbuf_rptr(mioc); @@ -1226,7 +1172,7 @@ atp_send_req(gref, mioc) */ athp = AT_ATP_HDR(m2); athp->cmd = ATP_CMD_TREQ; - UAS_ASSIGN(athp->tid, trp->tr_tid); + UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); athp->eom = 0; athp->sts = 0; trp->tr_xo = athp->xo; @@ -1239,7 +1185,8 @@ atp_send_req(gref, mioc) trp->tr_socket.net = NET_VALUE(ddp->dst_net); trp->tr_local_socket = atp->atp_socket_no; trp->tr_local_node = ddp->src_node; - NET_NET(trp->tr_local_net, ddp->src_net); + temp_net = NET_VALUE(ddp->src_net); + NET_ASSIGN_NOSWAP(trp->tr_local_net, temp_net); #ifdef NOT_YET /* save the local information in the gref */ @@ -1252,9 +1199,7 @@ atp_send_req(gref, mioc) /* * Put us in the transaction waiting queue */ - ATDISABLE(s, atp->atp_lock); ATP_Q_APPEND(atp->atp_trans_wait, trp, tr_list); - ATENABLE(s, atp->atp_lock); /* * Send the message and set the timer @@ -1296,6 +1241,7 @@ void atp_send_rsp(gref, m, wait) register at_atp_t *athp; register at_ddp_t *ddp; int s, xcnt; + u_short temp_net; atp = (struct atp_state *)gref->info; if (atp->dflag) @@ -1306,9 +1252,8 @@ void atp_send_rsp(gref, m, wait) /* * search for the corresponding rcb */ - ATDISABLE(s, atp->atp_lock); for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if ( (rcbp->rc_tid == UAS_VALUE(athp->tid)) && + if ( (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid)) && (rcbp->rc_socket.node == ddp->dst_node) && (rcbp->rc_socket.net == NET_VALUE(ddp->dst_net)) && (rcbp->rc_socket.socket == ddp->dst_socket) ) @@ -1320,11 +1265,9 @@ void atp_send_rsp(gref, m, wait) */ if ((rcbp && (rcbp->rc_state != RCB_NOTIFIED)) || (rcbp == NULL && athp->xo) ) { - ATENABLE(s, atp->atp_lock); gbuf_freem(m); return; } - ATENABLE(s, atp->atp_lock); if (rcbp == NULL) { /* a response is being sent for an ALO transaction */ if ((rcbp = atp_rcb_alloc(atp)) == NULL) { @@ -1335,16 +1278,15 @@ void atp_send_rsp(gref, m, wait) rcbp->rc_socket.socket = ddp->dst_socket; rcbp->rc_socket.node = ddp->dst_node; rcbp->rc_socket.net = NET_VALUE(ddp->dst_net); - rcbp->rc_tid = UAS_VALUE(athp->tid); + rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); rcbp->rc_bitmap = 0xff; rcbp->rc_xo = 0; rcbp->rc_state = RCB_RESPONSE_FULL; - ATDISABLE(s, atp->atp_lock); ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - ATENABLE(s, atp->atp_lock); } else if (ddp->src_node == 0) { - NET_NET(ddp->src_net, rcbp->rc_local_net); + temp_net = NET_VALUE_NOSWAP(rcbp->rc_local_net); + NET_ASSIGN(ddp->src_net, temp_net); ddp->src_node = rcbp->rc_local_node; } @@ -1442,7 +1384,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) void *proc; { gref_t *gref; - int s, rc; + int rc; unsigned short tid; unsigned int timer; register struct atp_state *atp; @@ -1473,9 +1415,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) ts.tv_sec = 0; ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - ATDISABLE(s, atp->atp_delay_lock); rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpmioc", &ts); - ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { *err = rc; file_drop(fd); @@ -1491,9 +1431,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) ts.tv_sec = 0; ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - ATDISABLE(s, atp->atp_delay_lock); rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpm2", &ts); - ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { gbuf_freeb(mioc); file_drop(fd); @@ -1534,9 +1472,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) ts.tv_sec = 0; ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - ATDISABLE(s, atp->atp_delay_lock); rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atptrp", &ts); - ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { gbuf_freem(mioc); file_drop(fd); @@ -1562,7 +1498,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) */ athp = AT_ATP_HDR(m2); athp->cmd = ATP_CMD_TREQ; - UAS_ASSIGN(athp->tid, trp->tr_tid); + UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); athp->eom = 0; athp->sts = 0; trp->tr_xo = athp->xo; @@ -1587,9 +1523,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) /* * Put us in the transaction waiting queue */ - ATDISABLE(s, atp->atp_lock); ATP_Q_APPEND(atp->atp_trans_wait, trp, tr_list); - ATENABLE(s, atp->atp_lock); /* * Send the message and set the timer @@ -1610,21 +1544,18 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) /* * wait for the transaction to complete */ - ATDISABLE(s, trp->tr_lock); while ((trp->tr_state != TRANS_DONE) && (trp->tr_state != TRANS_FAILED) && (trp->tr_state != TRANS_ABORTING)) { trp->tr_rsp_wait = 1; rc = msleep(&trp->tr_event, atalk_mutex, PSOCK | PCATCH, "atpsndreq", 0); if (rc != 0) { trp->tr_rsp_wait = 0; - ATENABLE(s, trp->tr_lock); file_drop(fd); *err = rc; return -1; } } trp->tr_rsp_wait = 0; - ATENABLE(s, trp->tr_lock); if (trp->tr_state == TRANS_FAILED || trp->tr_state == TRANS_ABORTING) { @@ -1806,7 +1737,7 @@ _ATPgetreq(fd, buf, buflen, err, proc) register struct atp_state *atp; register struct atp_rcb *rcbp; register gbuf_t *m, *m_head; - int s, size, len; + int size, len; if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) return -1; @@ -1820,7 +1751,6 @@ _ATPgetreq(fd, buf, buflen, err, proc) return -1; } - ATDISABLE(s, atp->atp_lock); if ((rcbp = atp->atp_attached.head) != NULL) { /* * Got one, move it to the active response Q @@ -1838,7 +1768,6 @@ _ATPgetreq(fd, buf, buflen, err, proc) */ atp_rcb_free(rcbp); } - ATENABLE(s, atp->atp_lock); /* * copyout the request data, including the protocol header @@ -1856,7 +1785,6 @@ _ATPgetreq(fd, buf, buflen, err, proc) file_drop(fd); return size; } - ATENABLE(s, atp->atp_lock); file_drop(fd); return -1; @@ -1872,7 +1800,7 @@ _ATPgetrsp(fd, bdsp, err, proc) gref_t *gref; register struct atp_state *atp; register struct atp_trans *trp; - int s, tid; + int tid; char bds[atpBDSsize]; if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) @@ -1887,7 +1815,6 @@ _ATPgetrsp(fd, bdsp, err, proc) return -1; } - ATDISABLE(s, atp->atp_lock); for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { dPrintf(D_M_ATP, D_L_INFO, ("ATPgetrsp: atp:0x%x, trp:0x%x, state:%d\n", @@ -1895,7 +1822,6 @@ _ATPgetrsp(fd, bdsp, err, proc) switch (trp->tr_state) { case TRANS_DONE: - ATENABLE(s, atp->atp_lock); if ((*err = copyin(CAST_USER_ADDR_T(bdsp), (caddr_t)bds, sizeof(bds))) != 0) { atp_free(trp); @@ -1920,7 +1846,6 @@ _ATPgetrsp(fd, bdsp, err, proc) /* * transaction timed out, return error */ - ATENABLE(s, atp->atp_lock); atp_free(trp); file_drop(fd); *err = ETIMEDOUT; @@ -1930,7 +1855,6 @@ _ATPgetrsp(fd, bdsp, err, proc) continue; } } - ATENABLE(s, atp->atp_lock); file_drop(fd); *err = EINVAL; @@ -1942,7 +1866,6 @@ atp_drop_req(gref, m) gref_t *gref; gbuf_t *m; { - int s; struct atp_state *atp; struct atp_rcb *rcbp; at_atp_t *athp; @@ -1957,9 +1880,8 @@ atp_drop_req(gref, m) /* * search for the corresponding rcb */ - ATDISABLE(s, atp->atp_lock); for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if ( (rcbp->rc_tid == UAS_VALUE(athp->tid)) && + if ( (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid)) && (rcbp->rc_socket.node == ddp->src_node) && (rcbp->rc_socket.net == NET_VALUE(ddp->src_net)) && (rcbp->rc_socket.socket == ddp->src_socket) ) @@ -1971,7 +1893,6 @@ atp_drop_req(gref, m) */ if (rcbp) atp_rcb_free(rcbp); - ATENABLE(s, atp->atp_lock); gbuf_freem(m); } diff --git a/bsd/netat/aurp.h b/bsd/netat/aurp.h index c98f2321f..8301f4078 100644 --- a/bsd/netat/aurp.h +++ b/bsd/netat/aurp.h @@ -27,6 +27,8 @@ * File: aurp.h */ +#ifdef AURP_SUPPORT + #ifndef _NETAT_AURP_H_ #define _NETAT_AURP_H_ #include @@ -166,9 +168,6 @@ typedef struct { unsigned short flags; } aurp_hdr_t; -#ifdef AURP_SUPPORT - -extern atlock_t aurpgen_lock; extern gref_t *aurp_gref; extern unsigned char dst_addr_cnt; extern unsigned char net_access_cnt; @@ -178,7 +177,6 @@ extern int net_port; extern int update_tmo; extern aurp_state_t aurp_state[]; extern unsigned short net_access[]; -#endif struct myq { struct mbuf *q_head; @@ -186,7 +184,6 @@ struct myq int q_cnt; }; -#define LOCK_DECL(x) atlock_t x #include @@ -289,3 +286,5 @@ extern struct aurp_global_t aurp_global; #endif /* KERNEL_PRIVATE */ #endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AURP_H_ */ + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_aurpd.c b/bsd/netat/aurp_aurpd.c index bcd8365ac..cf9695434 100644 --- a/bsd/netat/aurp_aurpd.c +++ b/bsd/netat/aurp_aurpd.c @@ -32,6 +32,7 @@ * Kernel process to implement the AURP daemon: * manage tunnels to remote AURP servers across IP networks */ +#ifdef AURP_SUPPORT #include #include @@ -103,7 +104,6 @@ aurpd_start() */ bzero((char *)&aurp_global.tunnel, sizeof(aurp_global.tunnel)); /*lock_alloc(&aurp_global.glock, LOCK_ALLOC_PIN, AURP_EVNT_LOCK, -1);*/ - ATLOCKINIT(aurp_global.glock); ATEVENTINIT(aurp_global.event_anchor); /* open udp socket */ @@ -203,7 +203,6 @@ AURPgetmsg(err) * when a packet arrives */ - ATDISABLE(s, aurp_global.glock); events = aurp_global.event; if (((*err == 0) || (*err == EWOULDBLOCK)) && events == 0) { @@ -212,7 +211,6 @@ AURPgetmsg(err) events = aurp_global.event; aurp_global.event = 0; } - ATENABLE(s, aurp_global.glock); /* * Shut down if we have the AE_SHUTDOWN event or if we got @@ -282,9 +280,7 @@ AURPgetmsg(err) * which will wake us from the sleep at * the top of the outer loop. */ - ATDISABLE(s, aurp_global.glock); aurp_global.event &= ~AE_UDPIP; - ATENABLE(s, aurp_global.glock); dPrintf(D_M_AURP, D_L_WARNING, ("AURPgetmsg: spurious soreceive, err==%d, p_mbuf==0x%x\n", *err, (unsigned int) p_mbuf)); break; } @@ -304,9 +300,7 @@ void aurp_wakeup(__unused struct socket *so, register caddr_t p, __unused int st register int bit; bit = (int) p; - ATDISABLE(s, aurp_global.glock); aurp_global.event |= bit; - ATENABLE(s, aurp_global.glock); dPrintf(D_M_AURP, D_L_STATE_CHG, ("aurp_wakeup: bit 0x%x, aurp_global.event now 0x%x\n", @@ -422,14 +416,10 @@ atalk_to_ip(register gbuf_t *m) domain = (aurp_domain_t *)gbuf_rptr(m); *(long *) &rem_addr.sin_addr = domain->dst_address; - ATDISABLE(s, aurp_global.glock); aurp_global.running++; - ATENABLE(s, aurp_global.glock); if (aurp_global.shutdown) { gbuf_freem(m); - ATDISABLE(s, aurp_global.glock); aurp_global.running--; - ATENABLE(s, aurp_global.glock); dPrintf(D_M_AURP, D_L_SHUTDN_INFO, ("atalk_to_ip: detected aurp_global.shutdown state\n")); return; @@ -442,9 +432,8 @@ atalk_to_ip(register gbuf_t *m) error)); } - ATDISABLE(s, aurp_global.glock); aurp_global.running--; - ATENABLE(s, aurp_global.glock); return; } +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_cfg.c b/bsd/netat/aurp_cfg.c index 97f8c6d33..b20e62f97 100644 --- a/bsd/netat/aurp_cfg.c +++ b/bsd/netat/aurp_cfg.c @@ -27,6 +27,9 @@ * * File: cfg.c */ + +#ifdef AURP_SUPPORT + #define RESOLVE_DBG #include #include @@ -49,7 +52,6 @@ #include #include -extern atlock_t aurpgen_lock; static int aurp_inited = 0; static char aurp_minor_no[4]; @@ -59,10 +61,8 @@ int aurp_open(gref) extern void AURPcmdx(); int i; - if (!aurp_inited) { + if (!aurp_inited) aurp_inited = 1; - ATLOCKINIT(aurpgen_lock); - } for (i=1; i < sizeof(aurp_minor_no); i++) { if (aurp_minor_no[i] == 0) { @@ -98,3 +98,5 @@ int aurp_close(gref) gref->info = 0; return 0; } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_gdata.c b/bsd/netat/aurp_gdata.c index fa15fbcf4..4cd8000ac 100644 --- a/bsd/netat/aurp_gdata.c +++ b/bsd/netat/aurp_gdata.c @@ -27,6 +27,9 @@ * * File: gdata.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -49,7 +52,6 @@ #include #include -atlock_t aurpgen_lock; gref_t *aurp_gref; unsigned char dst_addr_cnt; unsigned char net_access_cnt; @@ -59,3 +61,5 @@ int net_port; int update_tmo; aurp_state_t aurp_state[256]; unsigned short net_access[AURP_MaxNetAccess]; + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_misc.c b/bsd/netat/aurp_misc.c index f1cd9a728..c244f4da8 100644 --- a/bsd/netat/aurp_misc.c +++ b/bsd/netat/aurp_misc.c @@ -27,6 +27,9 @@ * * File: misc.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -208,3 +211,5 @@ void AURPaccess() entry->AURPFlag = net_export ? 0 : AURP_NetHiden; } } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_open.c b/bsd/netat/aurp_open.c index 2157d68c7..dffb1d636 100644 --- a/bsd/netat/aurp_open.c +++ b/bsd/netat/aurp_open.c @@ -27,6 +27,9 @@ * * File: open.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -246,3 +249,5 @@ void AURPrcvOpenRsp(state, m) /* get routing info */ AURPsndRIReq(state); } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_rd.c b/bsd/netat/aurp_rd.c index 1140a2bf3..e71e7be05 100644 --- a/bsd/netat/aurp_rd.c +++ b/bsd/netat/aurp_rd.c @@ -27,6 +27,9 @@ * * File: rd.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -115,3 +118,5 @@ void AURPrcvRDReq(state, m) /* respond to the going-down peer with an RI Ack packet */ AURPsndRIAck(state, m, 0); } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_ri.c b/bsd/netat/aurp_ri.c index 44d8df254..0fa8cbd20 100644 --- a/bsd/netat/aurp_ri.c +++ b/bsd/netat/aurp_ri.c @@ -27,6 +27,9 @@ * * File: ri.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -152,14 +155,12 @@ void AURPsndRIRsp(state) gbuf_t *m; aurp_hdr_t *hdrp; short len = 0; - int s, msize = 0; + int msize = 0; - ATDISABLE(s, aurpgen_lock); /* make sure we're in a valid state to send RI response */ if ((state->snd_state == AURPSTATE_Unconnected) || (state->snd_state == AURPSTATE_WaitingForRIAck2)) { - ATENABLE(s, aurpgen_lock); return; } @@ -167,7 +168,6 @@ void AURPsndRIRsp(state) state->snd_state = AURPSTATE_WaitingForRIAck1; if (state->rsp_m == 0) { - ATENABLE(s, aurpgen_lock); msize = sizeof(aurp_hdr_t); if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); @@ -201,8 +201,6 @@ void AURPsndRIRsp(state) timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); state->snd_tmo = 1; - if (msize == 0) - ATENABLE(s, aurpgen_lock); /* send the packet */ if (m) { @@ -229,13 +227,11 @@ void AURPsndRIUpd(state) short len = 0; int s, msize = 0; - ATDISABLE(s, aurpgen_lock); /* make sure we're in a valid state to send update */ if (state->snd_next_entry || (state->upd_m == 0) || (state->snd_state == AURPSTATE_Unconnected) || (state->snd_state == AURPSTATE_WaitingForRIAck1)) { - ATENABLE(s, aurpgen_lock); return; } @@ -243,7 +239,6 @@ void AURPsndRIUpd(state) state->snd_state = AURPSTATE_WaitingForRIAck2; if (state->snd_tmo == 0) { - ATENABLE(s, aurpgen_lock); msize = sizeof(aurp_hdr_t); m = state->upd_m; len = gbuf_len(m); @@ -264,8 +259,6 @@ void AURPsndRIUpd(state) timeout(AURPsndRIUpd_locked, state, AURP_RetryInterval*HZ); state->snd_tmo = 1; - if (msize == 0) - ATENABLE(s, aurpgen_lock); /* send the packet */ if (m) { @@ -283,12 +276,10 @@ void AURPrcvRIReq(state, m) aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); int s; - ATDISABLE(s, aurpgen_lock); /* make sure we're in a valid state to accept it */ if ((state->snd_state == AURPSTATE_Unconnected) || (state->snd_state == AURPSTATE_WaitingForRIAck2)) { - ATENABLE(s, aurpgen_lock); dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIReq: unexpected request\n")); gbuf_freem(m); return; @@ -296,7 +287,6 @@ void AURPrcvRIReq(state, m) /* check for the correct connection id */ if (hdrp->connection_id != state->snd_connection_id) { - ATENABLE(s, aurpgen_lock); dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIReq: invalid connection id, r=%d, m=%d\n", hdrp->connection_id, state->snd_connection_id)); @@ -310,10 +300,8 @@ void AURPrcvRIReq(state, m) gbuf_freem(state->rsp_m); state->rsp_m = 0; } - ATENABLE(s, aurpgen_lock); AURPsndRIRsp(state); - } else - ATENABLE(s, aurpgen_lock); + } gbuf_freem(m); } @@ -324,13 +312,10 @@ void AURPrcvRIRsp(state, m) gbuf_t *m; { aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - int s; - ATDISABLE(s, aurpgen_lock); /* make sure we're in a valid state to accept it */ if (state->rcv_state != AURPSTATE_WaitingForRIRsp) { - ATENABLE(s, aurpgen_lock); dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIRsp: unexpected response\n")); gbuf_freem(m); return; @@ -338,7 +323,6 @@ void AURPrcvRIRsp(state, m) /* check for the correct connection id */ if (hdrp->connection_id != state->rcv_connection_id) { - ATENABLE(s, aurpgen_lock); dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIRsp: invalid connection id, r=%d, m=%d\n", hdrp->connection_id, state->rcv_connection_id)); @@ -348,7 +332,6 @@ void AURPrcvRIRsp(state, m) /* check for the correct sequence number */ if (hdrp->sequence_number != state->rcv_sequence_number) { - ATENABLE(s, aurpgen_lock); if ( ((state->rcv_sequence_number == AURP_FirstSeqNum) && (hdrp->sequence_number == AURP_LastSeqNum)) || (hdrp->sequence_number == (state->rcv_sequence_number-1)) ) { @@ -364,7 +347,6 @@ void AURPrcvRIRsp(state, m) gbuf_rinc(m,sizeof(*hdrp)); if (hdrp->flags & AURPFLG_LAST) state->rcv_state = AURPSTATE_Connected; - ATENABLE(s, aurpgen_lock); dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIRsp: len=%ld\n", gbuf_len(m))); @@ -457,12 +439,10 @@ void AURPrcvRIAck(state, m) gbuf_t *dat_m; aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); unsigned char snd_state; - int s; int flag; dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIAck: state=%d\n", state->snd_state)); - ATDISABLE(s, aurpgen_lock); /* make sure we're in a valid state to accept it */ snd_state = state->snd_state; @@ -492,7 +472,6 @@ void AURPrcvRIAck(state, m) /* update state info */ state->snd_state = AURPSTATE_Connected; - ATENABLE(s, aurpgen_lock); if (state->snd_next_entry) /* more RI responses to send? */ AURPsndRIRsp(state); @@ -502,8 +481,7 @@ void AURPrcvRIAck(state, m) AURPsndZRsp(state, dat_m, flag); else if (dat_m) gbuf_freem(dat_m); - } else - ATENABLE(s, aurpgen_lock); + } gbuf_freem(m); } @@ -831,17 +809,14 @@ void AURPrtupdate(entry, ev) (!(state->snd_sui & AURPFLG_ND))) continue; if ((ev == AURPEV_NetDistChange) && (!(state->snd_sui & AURPFLG_NDC))) continue; - ATDISABLE(s, aurpgen_lock); if ((state->snd_state != AURPSTATE_Unconnected) && (state->snd_state != AURPSTATE_WaitingForRIAck2)) { if ((m = state->upd_m) == 0) { /* * we don't have the RI update buffer yet, allocate one */ - ATENABLE(s, aurpgen_lock); if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_HI)) == 0) continue; - ATDISABLE(s, aurpgen_lock); state->upd_m = m; gbuf_rinc(m,msize); gbuf_wset(m,0); @@ -859,11 +834,11 @@ void AURPrtupdate(entry, ev) * if the RI update buffer is full, send the RI update now */ if (gbuf_len(m) > (AURP_MaxPktSize-6)) { - ATENABLE(s, aurpgen_lock); AURPsndRIUpd(state); continue; } } - ATENABLE(s, aurpgen_lock); } } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_rx.c b/bsd/netat/aurp_rx.c index 2a3d85b30..dbc90b1e8 100644 --- a/bsd/netat/aurp_rx.c +++ b/bsd/netat/aurp_rx.c @@ -28,6 +28,8 @@ * * File: rx.c */ +#ifdef AURP_SUPPORT + #include #include #include @@ -207,3 +209,5 @@ at_insert(m, type, node) return 0; } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_tickle.c b/bsd/netat/aurp_tickle.c index 91994a9bc..664f2cc09 100644 --- a/bsd/netat/aurp_tickle.c +++ b/bsd/netat/aurp_tickle.c @@ -27,6 +27,9 @@ * * File: tickle.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -156,3 +159,5 @@ void AURPrcvTickleAck(state, m) /* update state info */ state->tickle_retry = 0; } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_tx.c b/bsd/netat/aurp_tx.c index c6c4ce658..6bd686729 100644 --- a/bsd/netat/aurp_tx.c +++ b/bsd/netat/aurp_tx.c @@ -27,6 +27,9 @@ * * File: tx.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -139,3 +142,5 @@ void AURPcmdx(code, mdata, param) dPrintf(D_M_AURP, D_L_ERROR, ("AURPcmdx: bad code, %d\n", code)); } } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_zi.c b/bsd/netat/aurp_zi.c index 33d051d03..64680d328 100644 --- a/bsd/netat/aurp_zi.c +++ b/bsd/netat/aurp_zi.c @@ -27,6 +27,9 @@ * * File: zi.c */ + +#ifdef AURP_SUPPORT + #include #include #include @@ -611,3 +614,5 @@ AURPsetzi(node, m, sub_code, tuples_cnt) buf += zname->len+1; } } + +#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/ddp.c b/bsd/netat/ddp.c index e471d55c1..dcd25cf5d 100644 --- a/bsd/netat/ddp.c +++ b/bsd/netat/ddp.c @@ -88,8 +88,6 @@ void (*ddp_AURPsendx)(); at_ifaddr_t *aurp_ifID = 0; extern pktsIn,pktsOut; int pktsDropped,pktsHome; -atlock_t ddpall_lock; -atlock_t ddpinp_lock; extern int *atp_pidM; extern int *adsp_pidM; @@ -452,7 +450,6 @@ void ddp_notify_nbp(socket, pid, ddptype) if (at_state.flags & AT_ST_STARTED) { /* *** NBP_CLOSE_NOTE processing (from ddp_nbp.c) *** */ - ATDISABLE(nve_lock, NVE_LOCK); for ((nve_entry = TAILQ_FIRST(&name_registry)); nve_entry; nve_entry = nve_next) { nve_next = TAILQ_NEXT(nve_entry, nve_link); if ((at_socket)socket == nve_entry->address.socket && @@ -464,7 +461,6 @@ void ddp_notify_nbp(socket, pid, ddptype) nbp_delete_entry(nve_entry); } } - ATENABLE(nve_lock, NVE_LOCK); } } /* ddp_notify_nbp */ @@ -479,12 +475,12 @@ static void fillin_pkt_chain(m) if (UAS_VALUE(ddp->checksum)) { tmp = ddp_checksum(m, 4); - UAS_ASSIGN(ddp->checksum, tmp); + UAS_ASSIGN_HTON(ddp->checksum, tmp); } for (tmp_m=gbuf_next(tmp_m); tmp_m; tmp_m=gbuf_next(tmp_m)) { tmp_ddp = (at_ddp_t *)gbuf_rptr(tmp_m); - tmp_ddp->length = gbuf_msgsize(tmp_m); + DDPLEN_ASSIGN(tmp_ddp, gbuf_msgsize(tmp_m)); tmp_ddp->hopcount = tmp_ddp->unused = 0; NET_NET(tmp_ddp->src_net, ddp->src_net); @@ -492,7 +488,7 @@ static void fillin_pkt_chain(m) tmp_ddp->src_socket = ddp->src_socket; if (UAS_VALUE(tmp_ddp->checksum)) { tmp = ddp_checksum(tmp_m, 4); - UAS_ASSIGN(tmp_ddp->checksum, tmp); + UAS_ASSIGN_HTON(ddp->checksum, tmp); } } } @@ -573,7 +569,7 @@ int ddp_output(mp, src_socket, src_addr_included) at_ddp_stats.xmit_bytes += len; at_ddp_stats.xmit_packets++; - ddp->length = len; + DDPLEN_ASSIGN(ddp, len); ddp->hopcount = ddp->unused = 0; @@ -853,7 +849,7 @@ int ddp_output(mp, src_socket, src_addr_included) * it doesn't know net#, consequently can't do * AMT_LOOKUP. That task left to aarp now. */ - aarp_send_data(m,ifID,&dest_at_addr, loop); + aarp_send_data(m,ifID, &dest_at_addr, loop); break; case ET_ADDR : pat_output(ifID, m, &dest_addr, 0); @@ -936,7 +932,7 @@ void ddp_input(mp, ifID) * {extended ddp, ... }. */ ddp = (at_ddp_t *)gbuf_rptr(mp); - len = ddp->length; + len = DDPLEN_VALUE(ddp); if (msgsize != len) { if ((unsigned) msgsize > len) { @@ -985,10 +981,10 @@ void ddp_input(mp, ifID) * if the checksum is true, then upstream wants us to calc */ if (UAS_VALUE(ddp->checksum) && - (UAS_VALUE(ddp->checksum) != ddp_checksum(mp, 4))) { + (UAS_VALUE_NTOH(ddp->checksum) != ddp_checksum(mp, 4))) { dPrintf(D_M_DDP, D_L_WARNING, ("Checksum error on incoming pkt, calc 0x%x, exp 0x%x", - ddp_checksum(mp, 4), UAS_VALUE(ddp->checksum))); + ddp_checksum(mp, 4), UAS_VALUE_NTOH(ddp->checksum))); snmpStats.dd_checkSum++; at_ddp_stats.rcv_bad_checksum++; gbuf_freem(mp); @@ -1147,6 +1143,7 @@ int ddp_router_output(mp, ifID, addr_type, router_net, router_node, enet_addr) } ddp = (at_ddp_t *)gbuf_rptr(mp); +#ifdef AURP_SUPPORT if (ifID->ifFlags & AT_IFF_AURP) { /* AURP link? */ if (ddp_AURPsendx) { fillin_pkt_chain(mp); @@ -1159,6 +1156,7 @@ int ddp_router_output(mp, ifID, addr_type, router_net, router_node, enet_addr) return EPROTOTYPE; } } +#endif /* keep some of the tests for now ####### */ @@ -1311,15 +1309,14 @@ void rt_delete(NetStop, NetStart) RT_entry *found; int s; - ATDISABLE(s, ddpinp_lock); if ((found = rt_bdelete(NetStop, NetStart)) != 0) { bzero(found, sizeof(RT_entry)); found->right = RT_table_freelist; RT_table_freelist = found; } - ATENABLE(s, ddpinp_lock); } +#ifdef AURP_SUPPORT int ddp_AURPfuncx(code, param, node) int code; void *param; @@ -1391,7 +1388,7 @@ int ddp_AURPfuncx(code, param, node) return 0; } - +#endif /* checks to see if address of packet is for one of our interfaces returns *ifID if it's for us, NULL if not diff --git a/bsd/netat/ddp.h b/bsd/netat/ddp.h index 919d73373..234249649 100644 --- a/bsd/netat/ddp.h +++ b/bsd/netat/ddp.h @@ -62,9 +62,17 @@ /* DDP extended header packet format */ typedef struct { - unsigned unused:2, - hopcount:4, - length:10; /* Datagram length */ +#if BYTE_ORDER == BIG_ENDIAN + unsigned unused:2, + hopcount:4, /* hop count/len high order */ + length_H:2; +#endif +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned length_H:2, + hopcount:4, + unused:2; +#endif + u_char length_L; /* len low order */ ua_short checksum; /* Checksum */ at_net dst_net; /* Destination network number */ at_net src_net; /* Source network number */ @@ -72,12 +80,17 @@ typedef struct { at_node src_node; /* Source node ID */ at_socket dst_socket; /* Destination socket number */ at_socket src_socket; /* Source socket number */ - u_char type; /* Protocol type */ + u_char type; /* Protocol type */ char data[DDP_DATA_SIZE]; } at_ddp_t; -#define DDPLEN_ASSIGN(ddp, len) ddp->length = len -#define DDPLEN_VALUE(ddp) ddp->length + +#define DDPLEN_ASSIGN(ddp, len) \ + ddp->length_H = 0x03 & (len >> 8); \ + ddp->length_L = len & 0xff; + +#define DDPLEN_VALUE(ddp) \ + (((u_short)ddp->length_H) << 8) + ddp->length_L /* DDP module statistics and configuration */ diff --git a/bsd/netat/ddp_aarp.c b/bsd/netat/ddp_aarp.c index d3692d60c..08c50531a 100644 --- a/bsd/netat/ddp_aarp.c +++ b/bsd/netat/ddp_aarp.c @@ -93,7 +93,6 @@ StaticProc void aarp_sched_req(void *); StaticProc int aarp_get_rand_node(); StaticProc int aarp_get_next_node(); StaticProc int aarp_get_rand_net(); -atlock_t arpinp_lock; extern void AARPwakeup(aarp_amt_t *); extern int pat_output(at_ifaddr_t *, gbuf_t *, unsigned char *, int); @@ -181,7 +180,7 @@ int aarp_rcv_pkt(pkt, elapp) aarp_pkt_t *pkt; at_ifaddr_t *elapp; { - switch (pkt->aarp_cmd) { + switch (ntohs(pkt->aarp_cmd)) { case AARP_REQ_CMD: return (aarp_req_cmd_in (pkt, elapp)); case AARP_RESP_CMD: @@ -281,7 +280,7 @@ StaticProc int aarp_resp_cmd_in (pkt, elapp) break; case PROBE_DONE : - AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp); + AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp) if (amt_ptr == NULL) return(-1); if (amt_ptr->tmo) { @@ -422,18 +421,18 @@ int aarp_chk_addr(ddp_hdrp, elapp) * will ALWAYS be removed. If the message is dropped, * it's not an "error". * + * Parameter dest_at_addr must have the net # in network byte order ****************************************************************************/ int aarp_send_data(m, elapp, dest_at_addr, loop) register gbuf_t *m; register at_ifaddr_t *elapp; - struct atalk_addr *dest_at_addr; + struct atalk_addr *dest_at_addr; /* net# in network byte order */ int loop; /* if true, loopback broadcasts */ { register aarp_amt_t *amt_ptr; register at_ddp_t *ddp_hdrp; int error; - int s; struct timeval timenow; getmicrouptime(&timenow); @@ -451,22 +450,19 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) ddp_input(m, elapp); return(0); } - ATDISABLE(s, arpinp_lock); AMT_LOOK(amt_ptr, *dest_at_addr, elapp); if (amt_ptr) { - if (amt_ptr->m) { + if (amt_ptr->m) { /* * there's already a packet awaiting transmission, so * drop this one and let the upper layer retransmit * later. */ - ATENABLE(s, arpinp_lock); gbuf_freel(m); return (0); } - ATENABLE(s, arpinp_lock); return (pat_output(elapp, m, (unsigned char *)&amt_ptr->dest_addr, 0)); } @@ -478,7 +474,6 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) gbuf_t *newm = 0; struct etalk_addr *dest_addr; - ATENABLE(s, arpinp_lock); dest_addr = &elapp->cable_multicast_addr; if (loop) newm = (gbuf_t *)gbuf_dupm(m); @@ -501,7 +496,7 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) } return (error); } - NEW_AMT(amt_ptr, *dest_at_addr,elapp); + NEW_AMT(amt_ptr, *dest_at_addr, elapp) if (amt_ptr->m) { /* @@ -509,7 +504,6 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) * drop this one and let the upper layer retransmit * later. */ - ATENABLE(s, arpinp_lock); gbuf_freel(m); return (0); } @@ -521,7 +515,6 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) amt_ptr->m = m; amt_ptr->elapp = elapp; amt_ptr->no_of_retries = 0; - ATENABLE(s, arpinp_lock); if ((error = aarp_send_req(amt_ptr))) { aarp_delete_amt_info(amt_ptr); @@ -557,7 +550,7 @@ StaticProc int aarp_send_resp(elapp, pkt) new_pkt = (aarp_pkt_t *)gbuf_rptr(m); aarp_build_pkt(new_pkt, elapp); - new_pkt->aarp_cmd = AARP_RESP_CMD; + new_pkt->aarp_cmd = htons(AARP_RESP_CMD); new_pkt->dest_addr = pkt->src_addr; new_pkt->dest_at_addr = pkt->src_at_addr; @@ -599,7 +592,7 @@ register aarp_amt_t *amt_ptr; pkt = (aarp_pkt_t *)gbuf_rptr(m); aarp_build_pkt(pkt, amt_ptr->elapp); - pkt->aarp_cmd = AARP_REQ_CMD; + pkt->aarp_cmd = htons(AARP_REQ_CMD); pkt->dest_addr = et_zeroaddr; pkt->dest_at_addr = amt_ptr->dest_at_addr; pkt->dest_at_addr.atalk_unused = 0; @@ -642,7 +635,7 @@ StaticProc int aarp_send_probe() pkt = (aarp_pkt_t *)gbuf_rptr(m); aarp_build_pkt(pkt, probe_cb.elapp); - pkt->aarp_cmd = AARP_PROBE_CMD; + pkt->aarp_cmd = htons(AARP_PROBE_CMD); pkt->dest_addr = et_zeroaddr; ATALK_ASSIGN(pkt->src_at_addr, probe_cb.elapp->initial_addr.s_net, @@ -700,9 +693,7 @@ register aarp_pkt_t *pkt; at_ifaddr_t *elapp; { register aarp_amt_t *amt_ptr; - int s; - ATDISABLE(s, arpinp_lock); AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp); if (amt_ptr == NULL) { @@ -712,10 +703,7 @@ at_ifaddr_t *elapp; NEW_AMT(amt_ptr, pkt->src_at_addr,elapp); if (amt_ptr->m) - { - ATENABLE(s, arpinp_lock); - return(0); /* no non-busy slots available in the cache */ - } + return(0); /* no non-busy slots available in the cache */ amt_ptr->dest_at_addr = pkt->src_at_addr; amt_ptr->dest_at_addr.atalk_unused = 0; @@ -728,7 +716,6 @@ at_ifaddr_t *elapp; amt_ptr->dest_addr = pkt->src_addr; if (FDDI_OR_TOKENRING(elapp->aa_ifp->if_type)) ddp_bit_reverse(&amt_ptr->dest_addr); - ATENABLE(s, arpinp_lock); return(1); } @@ -741,9 +728,7 @@ at_ifaddr_t *elapp; StaticProc int aarp_delete_amt_info(amt_ptr) register aarp_amt_t *amt_ptr; { - register s; register gbuf_t *m; - ATDISABLE(s, arpinp_lock); amt_ptr->last_time = 0; ATALK_ASSIGN(amt_ptr->dest_at_addr, 0, 0, 0); amt_ptr->no_of_retries = 0; @@ -751,11 +736,8 @@ register aarp_amt_t *amt_ptr; if (amt_ptr->m) { m = amt_ptr->m; amt_ptr->m = NULL; - ATENABLE(s, arpinp_lock); gbuf_freel(m); - } - else - ATENABLE(s, arpinp_lock); + } return(0); } @@ -794,8 +776,8 @@ StaticProc void aarp_build_pkt(pkt, elapp) register aarp_pkt_t *pkt; at_ifaddr_t *elapp; { - pkt->hardware_type = AARP_ETHER_HW_TYPE; - pkt->stack_type = AARP_AT_PROTO; + pkt->hardware_type = htons(AARP_ETHER_HW_TYPE); + pkt->stack_type = htons(AARP_AT_PROTO); pkt->hw_addr_len = ETHERNET_ADDR_LEN; pkt->stack_addr_len = AARP_AT_ADDR_LEN; bcopy(elapp->xaddr, pkt->src_addr.etalk_addr_octet, sizeof(elapp->xaddr)); @@ -811,7 +793,7 @@ StaticProc void aarp_build_pkt(pkt, elapp) StaticProc void aarp_sched_req(arg) void *arg; { - int s, i; + int i; aarp_amt_t *amt_ptr = (aarp_amt_t *)arg; atalk_lock(); @@ -829,21 +811,16 @@ StaticProc void aarp_sched_req(arg) /* * found match - pointer is valid */ - ATDISABLE(s, arpinp_lock); if (amt_ptr->tmo == 0) { - ATENABLE(s, arpinp_lock); atalk_unlock(); return; } if (amt_ptr->no_of_retries < AARP_MAX_REQ_RETRIES) { - ATENABLE(s, arpinp_lock); if (aarp_send_req(amt_ptr) == 0) { atalk_unlock(); return; } - ATDISABLE(s, arpinp_lock); } - ATENABLE(s, arpinp_lock); aarp_delete_amt_info(amt_ptr); break; } @@ -977,6 +954,8 @@ snmpAarpEnt_t *getAarp(elapId) aarp_amt_t *amtp; static snmpAarpEnt_t snmp[AMTSIZE]; snmpAarpEnt_t *snmpp; + struct atalk_addr addr; + u_short tmp_net; if (*elapId <0 || *elapId >= IF_TOTAL_MAX) @@ -993,7 +972,11 @@ snmpAarpEnt_t *getAarp(elapId) * & etalk_addr positions in the aarp_amt_t struct * has not changed and copy both at once */ - bcopy(&amtp->dest_at_addr, &snmpp->ap_ddpAddr, ENTRY_SIZE); + addr.atalk_unused = 0; + tmp_net = UAS_VALUE(amtp->dest_at_addr.atalk_net); + NET_ASSIGN(addr.atalk_net, tmp_net); + addr.atalk_node = amtp->dest_at_addr.atalk_node; + bcopy(&addr, &snmpp->ap_ddpAddr, ENTRY_SIZE); snmpp++; cnt++; diff --git a/bsd/netat/ddp_lap.c b/bsd/netat/ddp_lap.c index 49e20d573..e4c40d696 100644 --- a/bsd/netat/ddp_lap.c +++ b/bsd/netat/ddp_lap.c @@ -115,8 +115,6 @@ int xpatcnt = 0; /* externs */ extern TAILQ_HEAD(name_registry, _nve_) name_registry; extern snmpStats_t snmpStats; -extern atlock_t ddpinp_lock; -extern atlock_t arpinp_lock; extern short appletalk_inited; extern int adspInited; extern struct atpcb ddp_head; @@ -855,7 +853,7 @@ elap_dataput(m, elapp, addr_flag, addr) * it doesn't know net#, consequently can't do * AMT_LOOKUP. That task left to aarp now. */ - error = aarp_send_data(m,elapp,&dest_at_addr, loop); + error = aarp_send_data(m, elapp, &dest_at_addr, loop); break; case ET_ADDR : error = pat_output(elapp, m, &dest_addr, 0); @@ -1026,7 +1024,6 @@ void elap_offline(elapp) { void zip_sched_getnetinfo(); /* forward reference */ int errno; - int s; dPrintf(D_M_ELAP, D_L_SHUTDN_INFO, ("elap_offline:%s\n", elapp->ifName)); if (elapp->ifState != LAP_OFFLINE) { @@ -1040,11 +1037,9 @@ void elap_offline(elapp) (void)at_unreg_mcast(elapp, (caddr_t)&elapp->cable_multicast_addr); elapp->ifState = LAP_OFFLINE; - ATDISABLE(s, ddpinp_lock); if (MULTIPORT_MODE) RT_DELETE(elapp->ifThisCableEnd, elapp->ifThisCableStart); - ATENABLE(s, ddpinp_lock); /* make sure no zip timeouts are left running */ elapp->ifGNIScheduled = 0; @@ -1097,7 +1092,7 @@ int ddp_shutdown(count_only) CCB *sp, *sp_next; gref_t *gref; vm_offset_t temp_rcb_data, temp_state_data; - int i, s, active_skts = 0; /* count of active pids for non-socketized + int i, active_skts = 0; /* count of active pids for non-socketized AppleTalk protocols */ /* Network is shutting down... send error messages up on each open @@ -1106,8 +1101,6 @@ int ddp_shutdown(count_only) sockets, but return EBUSY and don't complete shutdown. *** */ - s = splimp(); /* *** previously contained mismatched locking - that was ifdef'ed to splimp() *** */ if (!count_only) nbp_shutdown(); /* clear all known NVE */ @@ -1193,11 +1186,9 @@ int ddp_shutdown(count_only) atalk_notify(gref, ESHUTDOWN); } } - if (count_only) { - splx(s); + if (count_only) return(active_skts); - } /* if there are no interfaces in the process of going online, continue shutting down DDP */ for (i = 0; i < IF_TOTAL_MAX; i++) { if (at_interfaces[i].startup_inprogress == TRUE) @@ -1249,7 +1240,6 @@ int ddp_shutdown(count_only) } ddp_start(); - splx(s); return(0); } /* ddp_shutdown */ @@ -1310,11 +1300,9 @@ void ZIPwakeup(elapp, ZipError) at_ifaddr_t *elapp; int ZipError; { - int s, error = ZipError; + int error = ZipError; - ATDISABLE(s, ddpinp_lock); if ( (elapp != NULL) && elapp->startup_inprogress) { - ATENABLE(s, ddpinp_lock); /* was ZIPContinue */ /* was elapp_online() with jump to ZIP_sleep */ @@ -1349,21 +1337,17 @@ void ZIPwakeup(elapp, ZipError) dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("elap_online: ifZipError=%d\n", error)); } - } else - ATENABLE(s, ddpinp_lock); + } } /* ZIPwakeup */ void AARPwakeup(probe_cb) aarp_amt_t *probe_cb; { - int s; int errno; at_ifaddr_t *elapp; - ATDISABLE(s, arpinp_lock); elapp = probe_cb->elapp; if ( (elapp != NULL) && elapp->startup_inprogress && elapp->aa_ifp != 0) { - ATENABLE(s, arpinp_lock); /* was AARPContinue */ errno = aarp_init2(elapp); @@ -1385,8 +1369,7 @@ void AARPwakeup(probe_cb) ("elap_online: aarp_init returns zero\n")); elap_online2(elapp); } - } else - ATENABLE(s, arpinp_lock); + } } /* AARPwakeup */ void ddp_bit_reverse(addr) diff --git a/bsd/netat/ddp_nbp.c b/bsd/netat/ddp_nbp.c index db4629db5..dabcee8c5 100644 --- a/bsd/netat/ddp_nbp.c +++ b/bsd/netat/ddp_nbp.c @@ -76,7 +76,6 @@ extern at_ifaddr_t *ifID_home; TAILQ_HEAD(name_registry, _nve_) name_registry; -atlock_t nve_lock; /* statics */ static int errno; @@ -117,14 +116,12 @@ void nbp_shutdown() /* delete all NVE's and release buffers */ register nve_entry_t *nve_entry, *nve_next; - ATDISABLE(nve_lock_pri,NVE_LOCK); - for ((nve_entry = TAILQ_FIRST(&name_registry)); nve_entry; nve_entry = nve_next) { - nve_next = TAILQ_NEXT(nve_entry, nve_link); + for ((nve_entry = TAILQ_FIRST(&name_registry)); nve_entry; nve_entry = nve_next) { + nve_next = TAILQ_NEXT(nve_entry, nve_link); /* NB: nbp_delete_entry calls TAILQ_REMOVE */ nbp_delete_entry(nve_entry); } - ATENABLE(nve_lock_pri,NVE_LOCK); if (lzones) { gbuf_freem(lzones); @@ -403,7 +400,7 @@ void nbp_input(m, ifID) ("nbp_input: BRREQ: src changed to %d.%d.%d\n", ifID->ifThisNode.s_net, ifID->ifThisNode.s_node, ourSkt)); - nbp->tuple[0].enu_addr.net = ifID->ifThisNode.s_net; + nbp->tuple[0].enu_addr.net = htons(ifID->ifThisNode.s_net); nbp->tuple[0].enu_addr.node = ifID->ifThisNode.s_node; nbp->tuple[0].enu_addr.socket = ourSkt; ddp->src_socket = NBP_SOCKET; @@ -499,7 +496,7 @@ static int nbp_validate_n_hash (nbp_req, wild_ok, checkLocal) #ifdef COMMENTED_OUT { int net,node,skt; - net = tuple->enu_addr.net; + net = ntohs(tuple->enu_addr.net); node = tuple->enu_addr.node; skt = tuple->enu_addr.socket; dPrintf(D_M_NBP_LOW,D_L_USR4, @@ -714,7 +711,6 @@ static nve_entry_t *nbp_search_nve (nbp_req, ifID) (u_int) nbp_req->response)); } #endif /* NBP_DEBUG */ - ATDISABLE(nve_lock_pri,NVE_LOCK); TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { if ((nbp_req->nve.zone_hash) && ((nbp_req->nve.zone_hash != @@ -800,15 +796,11 @@ static nve_entry_t *nbp_search_nve (nbp_req, ifID) if (nbp_req->func != NULL) { if ((*(nbp_req->func))(nbp_req, nve_entry) != 0) { /* errno expected to be set by func */ - ATENABLE(nve_lock_pri,NVE_LOCK); return (NULL); } - } else { - ATENABLE(nve_lock_pri,NVE_LOCK); + } else return (nve_entry); - } } - ATENABLE(nve_lock_pri,NVE_LOCK); errno = 0; return (NULL); @@ -848,7 +840,9 @@ register nve_entry_t *nve_entry; * tuple we want to write. Write it! */ tuple = (at_nbptuple_t *)gbuf_wptr(nbp_req->response); - tuple->enu_addr = nve_entry->address; + tuple->enu_addr.net = htons(nve_entry->address.net); + tuple->enu_addr.node = nve_entry->address.node; + tuple->enu_addr.socket = nve_entry->address.socket; tuple->enu_enum = nve_entry->enumerator; /* tuple is in the compressed (no "filler") format */ @@ -969,7 +963,7 @@ register nbp_req_t *nbp_req; case NBP_LKUP : ddp->dst_socket = nbp_req->nve.address.socket; ddp->dst_node = nbp_req->nve.address.node; - NET_ASSIGN(ddp->dst_net, nbp_req->nve.address.net); + NET_ASSIGN_NOSWAP(ddp->dst_net, nbp_req->nve.address.net); nbp->control = NBP_LKUP_REPLY; break; } @@ -1047,10 +1041,8 @@ getNbpTableSize() register nve_entry_t *nve; register int i=0; - ATDISABLE(nve_lock_pri,NVE_LOCK); for (nve = TAILQ_FIRST(&name_registry); nve; nve = TAILQ_NEXT(nve, nve_link), i++) i++; - ATENABLE(nve_lock_pri,NVE_LOCK); return(i); } @@ -1073,7 +1065,6 @@ getNbpTable(p, s, c) else nve = TAILQ_FIRST(&name_registry); - ATDISABLE(nve_lock_pri,NVE_LOCK); for ( ; nve && c ; nve = TAILQ_NEXT(nve, nve_link), p++,i++) { if (i>= s) { p->nbpe_object = nve->object; @@ -1081,7 +1072,6 @@ getNbpTable(p, s, c) c--; } } - ATENABLE(nve_lock_pri,NVE_LOCK); if (nve) { next = nve; nextNo = i; @@ -1323,7 +1313,6 @@ nve_entry_t *nbp_find_nve(nve) { register nve_entry_t *nve_entry; - ATDISABLE(nve_lock_pri,NVE_LOCK); TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { if (nve->zone_hash && ((nve->zone_hash != nve_entry->zone_hash) && @@ -1343,10 +1332,8 @@ nve_entry_t *nbp_find_nve(nve) continue; /* Found a match! */ - ATENABLE(nve_lock_pri,NVE_LOCK); return (nve_entry); } - ATENABLE(nve_lock_pri,NVE_LOCK); return (NULL); } /* nbp_find_nve */ @@ -1357,22 +1344,19 @@ static int nbp_enum_gen (nve_entry) register int new_enum = 0; register nve_entry_t *ne; - ATDISABLE(nve_lock_pri,NVE_LOCK); re_do: TAILQ_FOREACH(ne, &name_registry, nve_link) { if ((*(int *)&ne->address == *(int *)&nve_entry->address) && (ne->enumerator == new_enum)) { - if (new_enum == 255) { - ATENABLE(nve_lock_pri,NVE_LOCK); + if (new_enum == 255) return(EADDRNOTAVAIL); - } else { + else { new_enum++; goto re_do; } } } - ATENABLE(nve_lock_pri,NVE_LOCK); nve_entry->enumerator = new_enum; return (0); } @@ -1425,9 +1409,7 @@ int nbp_new_nve_entry(nve_entry, ifID) new_entry->tag = tag; new_entry->pid = proc_selfpid(); - ATDISABLE(nve_lock_pri,NVE_LOCK); TAILQ_INSERT_TAIL(&name_registry, new_entry, nve_link); - ATENABLE(nve_lock_pri,NVE_LOCK); at_state.flags |= AT_ST_NBP_CHANGED; #ifdef NBP_DEBUG diff --git a/bsd/netat/ddp_r_rtmp.c b/bsd/netat/ddp_r_rtmp.c index 438d9220f..6f877956b 100644 --- a/bsd/netat/ddp_r_rtmp.c +++ b/bsd/netat/ddp_r_rtmp.c @@ -92,7 +92,6 @@ extern int elap_online3(); extern pktsIn, pktsOut, pktsDropped, pktsHome; extern short ErrorRTMPoverflow, ErrorZIPoverflow; -extern atlock_t ddpinp_lock; extern lck_mtx_t * atalk_mutex; /* @@ -677,9 +676,11 @@ static void rtmp_update(ifID, rtmp, tuple_nb) ("rtmp_update: Shorter route found %d-%d, update\n", NewRoute.NetStart, NewRoute.NetStop)); +#ifdef AURP_SUPPORT if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) ddp_AURPsendx(AURPCODE_RTUPDATE, (void *)&NewRoute, AURPEV_NetDistChange); +#endif } } else { /* no entry found */ @@ -700,10 +701,11 @@ static void rtmp_update(ifID, rtmp, tuple_nb) NewRoute.NextIRNode, NewRoute.NetDist, NewRoute.NetPort, NewRoute.EntryState) == (RT_entry *)NULL) ErrorRTMPoverflow = 1; - +#ifdef AURP_SUPPORT else if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) ddp_AURPsendx(AURPCODE_RTUPDATE, (void *)&NewRoute, AURPEV_NetAdded); +#endif } } @@ -738,7 +740,6 @@ void rtmp_timeout(ifID) register at_ifaddr_t *ifID; { register u_char state; - register unsigned int s; short i; RT_entry *en = &RT_table[0]; @@ -754,7 +755,6 @@ register at_ifaddr_t *ifID; if (ifID->ifRouterState > NO_ROUTER) ifID->ifRouterState--; - ATDISABLE(s, ddpinp_lock); for (i = 0 ; i < RT_maxentry; i++,en++) { /* we want to age "learned" nets, not directly connected ones */ @@ -778,10 +778,11 @@ register at_ifaddr_t *ifID; dPrintf(D_M_RTMP, D_L_INFO, ("rtmp_timeout: Bad State for %d-%d (e#%d): remove\n", en->NetStart, en->NetStop, i)); - +#ifdef AURP_SUPPORT if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) ddp_AURPsendx(AURPCODE_RTUPDATE, (void *)en, AURPEV_NetDeleted); +#endif /* then clear the bit in the table concerning this entry. If the zone Count reaches zero, remove the entry */ @@ -797,7 +798,6 @@ register at_ifaddr_t *ifID; } } } - ATENABLE(s, ddpinp_lock); timeout(rtmp_timeout, (caddr_t) ifID, 20*SYS_HZ); atalk_unlock(); @@ -986,7 +986,6 @@ static int rtmp_send_table(ifID, DestNet, DestNode, split_hz, socket, short size,status ; register at_ddp_t *ddp; register short EntNb = 0, sent_tuple = 0; - register unsigned int s; if (ifID->ifRoutingState < PORT_ONLINE) { dPrintf(D_M_RTMP, D_L_INFO, @@ -1011,7 +1010,6 @@ static int rtmp_send_table(ifID, DestNet, DestNode, split_hz, socket, ddp = (at_ddp_t *)(gbuf_rptr(m)); Buff_ptr = (char *)((char *)ddp + DDP_X_HDR_SIZE + 10); - ATDISABLE(s, ddpinp_lock); while (EntNb < RT_maxentry) { if (Entry->NetStop && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT)) { @@ -1044,7 +1042,6 @@ static int rtmp_send_table(ifID, DestNet, DestNode, split_hz, socket, if (size > (DDP_DATA_SIZE-20)) { DDPLEN_ASSIGN(ddp, size + DDP_X_HDR_SIZE + 10); gbuf_winc(m,size); - ATENABLE(s, ddpinp_lock); if (status = ddp_router_output(m, ifID, AT_ADDR, NET_VALUE(DestNet),DestNode, 0)){ dPrintf(D_M_RTMP, D_L_WARNING, @@ -1066,13 +1063,11 @@ static int rtmp_send_table(ifID, DestNet, DestNode, split_hz, socket, sent_tuple, ifID->ifPort)); sent_tuple = 0; size = 0; - ATDISABLE(s, ddpinp_lock); } Entry++; EntNb++; } - ATENABLE(s, ddpinp_lock); /* * If we have some remaining entries to send, send them now. @@ -1630,10 +1625,9 @@ void rtmp_purge(ifID) at_ifaddr_t *ifID; { u_char state; - int i, s; + int i; RT_entry *en = &RT_table[0]; - ATDISABLE(s, ddpinp_lock); for (i=0; i < RT_maxentry; i++) { state = en->EntryState & 0x0F; if ((state > RTE_STATE_UNUSED) && (state != RTE_STATE_PERMANENT) @@ -1643,5 +1637,4 @@ void rtmp_purge(ifID) } en++; } - ATENABLE(s, ddpinp_lock); } diff --git a/bsd/netat/ddp_r_zip.c b/bsd/netat/ddp_r_zip.c index bde37beb8..890c8a374 100644 --- a/bsd/netat/ddp_r_zip.c +++ b/bsd/netat/ddp_r_zip.c @@ -440,7 +440,7 @@ static void zip_send_reply_to_query(mreceived, ifID) /* access the number of nets requested in the Query */ network_count = *((char *)(ddp_received->data) + 1); - NetAsked = (u_short *)(ddp_received->data+ 2); + NetAsked = (u_short *)(ddp_received->data + 2); /* check the validity of the Query packet */ @@ -478,7 +478,7 @@ static void zip_send_reply_to_query(mreceived, ifID) reply_length = 2; /* 1st byte is ZIP reply code, 2nd is network count */ for (i = 0 ; i < network_count ; i ++, NetAsked++) { - Entry = rt_blookup(*NetAsked); + Entry = rt_blookup(ntohs(*NetAsked)); if (Entry != NULL && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && RT_ALL_ZONES_KNOWN(Entry)) { /* this net is well known... */ @@ -541,7 +541,7 @@ static void zip_send_reply_to_query(mreceived, ifID) * and build a separate packet for each extended network requested */ - zip_send_ext_reply_to_query(mreceived, ifID, Entry, *NetAsked); + zip_send_ext_reply_to_query(mreceived, ifID, Entry, ntohs(*NetAsked)); } } @@ -579,7 +579,7 @@ void zip_router_input (m, ifID) register at_ddp_t *ddp; register at_atp_t *atp; register at_zip_t *zip; - register u_long user_bytes; + u_char user_bytes[4]; register u_short user_byte; /* variables for ZipNotify processing */ @@ -762,8 +762,8 @@ void zip_router_input (m, ifID) /* Get the user bytes in network order */ - user_bytes = UAL_VALUE(atp->user_bytes); - user_byte = user_bytes >> 24; /* Get the zeroth byte */ + *((u_long*)user_bytes) = UAL_VALUE(atp->user_bytes); + user_byte = user_bytes[0]; /* Get the zeroth byte */ dPrintf(D_M_ZIP, D_L_INPUT, ("zip_input: received a ZIP_ATP command=%d\n", user_byte)); @@ -1152,7 +1152,7 @@ int zip_type_packet (m) register at_atp_t *atp; register at_ddp_t *ddp; register at_zip_t *zip; - register u_long user_bytes; + u_char user_bytes[4]; register int user_byte; ddp = (at_ddp_t *)gbuf_rptr(m); @@ -1171,8 +1171,8 @@ int zip_type_packet (m) else atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); /* Get the user bytes in network order */ - user_bytes = UAL_VALUE(atp->user_bytes); - user_byte = user_bytes >> 24; /* Get the zeroth byte */ + *((u_long*)user_bytes) = UAL_VALUE(atp->user_bytes); + user_byte = user_bytes[0]; /* Get the zeroth byte */ if ((user_byte == ZIP_GETMYZONE) || (user_byte == ZIP_GETZONELIST) || (user_byte == ZIP_GETLOCALZONES)) @@ -1258,8 +1258,7 @@ int zip_handle_getmyzone(ifID, m) r_atp->bitmap = 0; UAS_UAS(r_atp->tid, atp->tid); ulongtmp = 1; - ulongtmp = htonl(ulongtmp); - UAL_ASSIGN(r_atp->user_bytes, ulongtmp); /* no of zones */ + UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* no of zones */ /* fill up atp data part */ bcopy((caddr_t) &ifID->ifZoneName, (caddr_t) r_atp->data, ifID->ifZoneName.len+1); @@ -1473,7 +1472,7 @@ zip_reply_received(m, ifID, reply_type) /* access the number of nets provided in the ZIP Reply */ - network_count = *(u_char *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 1); + network_count = ntohs(*(u_char *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 1)); PacketPtr = (char *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 2); @@ -1485,7 +1484,7 @@ zip_reply_received(m, ifID, reply_type) while (payload_len > 0 && network_count >0) { - Network = *(at_net_al *)PacketPtr; + Network = ntohs(*(at_net_al *)PacketPtr); PacketPtr += 2; zname = (at_nvestr_t *)PacketPtr; if (payload_len) @@ -1615,8 +1614,7 @@ static void zip_reply_to_getmyzone (ifID, m) r_atp->bitmap = 0; UAS_UAS(r_atp->tid, atp->tid); ulongtmp = 1; - ulongtmp = htonl(ulongtmp); - UAL_ASSIGN(r_atp->user_bytes, ulongtmp); /* no of zones */ + UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* no of zones */ data_ptr = (char *)r_atp->data; @@ -1713,7 +1711,7 @@ zip_reply_to_getzonelist (ifID, m) /* get the start index from the ATP request */ - StartPoint = (UAL_VALUE(atp->user_bytes) & 0xffff) -1; + StartPoint = (UAL_VALUE_NTOH(atp->user_bytes) & 0xffff) -1; /* find the next zone to send */ @@ -1754,7 +1752,7 @@ zip_reply_to_getzonelist (ifID, m) ulongtmp += 0x01000000; - UAL_ASSIGN(r_atp->user_bytes, ulongtmp); /* # of zones and flag*/ + UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* # of zones and flag*/ size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + PacketLen; gbuf_winc(rm,size); @@ -1825,7 +1823,7 @@ int zip_reply_to_getlocalzones (ifID, m) /* get the start index from the ATP request */ - Index_wanted = (UAL_VALUE(atp->user_bytes) & 0xffff) -1; + Index_wanted = (UAL_VALUE_NTOH(atp->user_bytes) & 0xffff) -1; dPrintf(D_M_ZIP_LOW, D_L_INFO, ("zip_r_GLZ: for station %d:%d Index_wanted = %d\n", @@ -1953,7 +1951,7 @@ int zip_reply_to_getlocalzones (ifID, m) r_atp->bitmap = 0; UAS_UAS(r_atp->tid, atp->tid); ulongtmp = ((last_flag << 24) & 0xFF000000) + ZonesInPacket; /* # of zones and flag*/ - UAL_ASSIGN(r_atp->user_bytes, ulongtmp); + UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + packet_len; gbuf_winc(rm,size); DDPLEN_ASSIGN(r_ddp, size); diff --git a/bsd/netat/ddp_rtmptable.c b/bsd/netat/ddp_rtmptable.c index aaefa9ae8..933c07649 100644 --- a/bsd/netat/ddp_rtmptable.c +++ b/bsd/netat/ddp_rtmptable.c @@ -86,7 +86,6 @@ char errstr[512]; /* used to display meaningfull router errors*/ extern at_ifaddr_t *ifID_table[]; extern at_ifaddr_t *ifID_home; extern snmpStats_t snmpStats; -extern atlock_t ddpinp_lock; short ErrorRTMPoverflow = 0; /* flag if RTMP table is too small for this net */ short ErrorZIPoverflow = 0; /* flag if ZIP table is too small for this net */ @@ -146,12 +145,10 @@ at_net_al NetNumber; RT_entry *ptree = &RT_table_start; at_net_al LowEnd; - register unsigned int s; /* dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : Lookup for Net=%d\n", "rt_blookup", NetNumber)); */ - ATDISABLE(s, ddpinp_lock); while (ptree) { if (NetNumber > ptree->NetStop) { @@ -176,7 +173,6 @@ at_net_al NetNumber; ptree = ptree->left; continue; } - ATENABLE(s, ddpinp_lock); /* we're in the range (either extended or not) * return the entry found. @@ -190,7 +186,6 @@ at_net_al NetNumber; return (ptree); } } - ATENABLE(s, ddpinp_lock); dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : %04d : NOT FOUND\n", "rt_blookup", NetNumber)); @@ -496,23 +491,19 @@ int zt_add_zonename(zname) at_nvestr_t *zname; { register short res,i; - register unsigned int s; if (res = zt_find_zname(zname)) return(res); - ATDISABLE(s, ddpinp_lock); for (i = 0; i < ZT_maxentry ; i++) { if (ZT_table[i].ZoneCount == 0 && ZT_table[i].Zone.len == 0) {/* free entry */ ZT_table[i].Zone = *zname; dPrintf(D_M_RTMP, D_L_VERBOSE, ("zt_add_zonename: zone #%d %s len=%d\n", i, ZT_table[i].Zone.str, ZT_table[i].Zone.len)); at_state.flags |= AT_ST_ZT_CHANGED; - ATENABLE(s, ddpinp_lock); return(i+1); } } - ATENABLE(s, ddpinp_lock); /* table full... */ return (ZT_MAXEDOUT); } @@ -707,14 +698,10 @@ RT_entry *ent; register u_char *zmap; register u_short i,j; register int zone_count = 0 ; - register unsigned int s; - ATDISABLE(s, ddpinp_lock); - if (!RT_ALL_ZONES_KNOWN(ent)) { - ATENABLE(s, ddpinp_lock); + if (!RT_ALL_ZONES_KNOWN(ent)) return (0); - } zmap = ent->ZoneBitMap; for (i = 0 ; i < ZT_BYTES ; i++) { @@ -727,7 +714,6 @@ RT_entry *ent; zmap++; } - ATENABLE(s, ddpinp_lock); return (zone_count); } @@ -739,13 +725,11 @@ at_nvestr_t *zname; { register short i, j, found; register char c1, c2; - register unsigned int s; if (!zname->len) return(0); - ATDISABLE(s, ddpinp_lock); for (i = 0 ; i < ZT_maxentry ; i++) { if (!ZT_table[i].ZoneCount || zname->len != ZT_table[i].Zone.len) continue; @@ -769,13 +753,10 @@ at_nvestr_t *zname; } } - if (found) { - ATENABLE(s, ddpinp_lock); + if (found) return (i+1); - } } - ATENABLE(s, ddpinp_lock); return(0); } @@ -788,14 +769,11 @@ void zt_set_zmap(znum, zmap) char *zmap; { register u_short num = znum -1; - register unsigned int s; - ATDISABLE(s, ddpinp_lock); if (!(zmap[num >> 3] & 0x80 >> (num % 8))) { zmap[num >> 3] |= 0x80 >> (num % 8); ZT_table[num].ZoneCount++; } - ATENABLE(s, ddpinp_lock); } @@ -807,14 +785,11 @@ void zt_clr_zmap(znum, zmap) char *zmap; { register u_short num = znum -1; - register unsigned int s; - ATDISABLE(s, ddpinp_lock); if (zmap[num >> 3] & 0x80 >> (num % 8)) { zmap[num >> 3] ^= 0x80 >> (num % 8); ZT_table[num].ZoneCount--; } - ATENABLE(s, ddpinp_lock); } diff --git a/bsd/netat/ddp_sip.c b/bsd/netat/ddp_sip.c index 766eabace..b4cef944b 100644 --- a/bsd/netat/ddp_sip.c +++ b/bsd/netat/ddp_sip.c @@ -114,7 +114,7 @@ void sip_input(mp, ifID) /* assuming that the whole packet is in one contiguous buffer */ atp = (at_atp_t *)ddp->data; - switch(UAL_VALUE(atp->user_bytes)) { + switch(UAL_VALUE_NTOH(atp->user_bytes)) { case SIP_SYSINFO_CMD : /* Sending a response with "AppleTalk driver version" (u_short) * followed by 14 zeros will pacify the interpoll. @@ -132,11 +132,11 @@ void sip_input(mp, ifID) else resp = (u_char *)gbuf_rptr(tmp); bzero(resp, 16); - *(u_short *)resp = SIP_DRIVER_VERSION; + *(u_short *)resp = htons(SIP_DRIVER_VERSION); ubytes.response = SIP_GOOD_RESPONSE; ubytes.unused = 0; - ubytes.responder_version = SIP_RESPONDER_VERSION; + ubytes.responder_version = htons(SIP_RESPONDER_VERSION); break; case SIP_DATALINK_CMD : /* In this case, the magic spell is to send 2 zeroes after @@ -153,23 +153,23 @@ void sip_input(mp, ifID) else resp = (u_char *)gbuf_rptr(tmp); bzero(resp, 16); - *(u_short *)resp = SIP_DRIVER_VERSION; + *(u_short *)resp = htons(SIP_DRIVER_VERSION); ubytes.response = SIP_GOOD_RESPONSE; ubytes.unused = 0; - ubytes.responder_version = SIP_RESPONDER_VERSION; + ubytes.responder_version = htons(SIP_RESPONDER_VERSION); break; default : /* bad request, send a bad command response back */ ubytes.response = SIP_BAD_RESPONSE; ubytes.unused = 0; - ubytes.responder_version = SIP_RESPONDER_VERSION; + ubytes.responder_version = htons(SIP_RESPONDER_VERSION); } NET_NET(ddp->dst_net, ddp->src_net); ddp->dst_node = ddp->src_node; ddp->dst_socket = ddp->src_socket; - bcopy((caddr_t) &ubytes, (caddr_t) atp->user_bytes, sizeof(ubytes)); + UAL_ASSIGN_HTON(atp->user_bytes, &ubytes); atp->cmd = ATP_CMD_TRESP; atp->eom = 1; atp->sts = 0; diff --git a/bsd/netat/ddp_usrreq.c b/bsd/netat/ddp_usrreq.c index 9331419cb..49f9ae768 100644 --- a/bsd/netat/ddp_usrreq.c +++ b/bsd/netat/ddp_usrreq.c @@ -80,7 +80,7 @@ int ddp_pru_control(struct socket *so, u_long cmd, caddr_t data, int ddp_pru_attach(struct socket *so, int proto, struct proc *p) { - int s, error = 0; + int error = 0; at_ddp_t *ddp = NULL; struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); @@ -88,9 +88,7 @@ int ddp_pru_attach(struct socket *so, int proto, if (error != 0) return error; - s = splnet(); error = at_pcballoc(so, &ddp_head); - splx(s); if (error) return error; pcb = (struct atpcb *)((so)->so_pcb); @@ -105,7 +103,7 @@ int ddp_pru_attach(struct socket *so, int proto, int ddp_pru_disconnect(struct socket *so) { - int s, error = 0; + int error = 0; at_ddp_t *ddp = NULL; struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); @@ -116,9 +114,7 @@ int ddp_pru_disconnect(struct socket *so) return ENOTCONN; soisdisconnected(so); - s = splnet(); at_pcbdetach(pcb); - splx(s); return error; } @@ -126,31 +122,25 @@ int ddp_pru_disconnect(struct socket *so) int ddp_pru_abort(struct socket *so) { - int s; struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); if (pcb == NULL) return (EINVAL); soisdisconnected(so); - s = splnet(); at_pcbdetach(pcb); - splx(s); return 0; } int ddp_pru_detach(struct socket *so) { - int s; struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); if (pcb == NULL) return (EINVAL); - s = splnet(); at_pcbdetach(pcb); - splx(s); return 0; } @@ -223,8 +213,8 @@ int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, } } if (ddp) { - ddp->length = m->m_pkthdr.len; - UAS_ASSIGN(ddp->checksum, + DDPLEN_ASSIGN(ddp, m->m_pkthdr.len); + UAS_ASSIGN_HTON(ddp->checksum, (pcb->ddp_flags & DDPFLG_CHKSUM)? 1: 0); ddp->type = (pcb->ddptype)? pcb->ddptype: DEFAULT_OT_DDPTYPE; #ifdef NOT_YET @@ -253,8 +243,8 @@ int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); ddp->src_node = ifID->ifThisNode.s_node; ddp->src_socket = pcb->lport; - if (UAS_VALUE(ddp->checksum)) - UAS_ASSIGN(ddp->checksum, ddp_checksum(m, 4)); + if (UAS_VALUE_NTOH(ddp->checksum)) + UAS_ASSIGN_HTON(ddp->checksum, ddp_checksum(m, 4)); ddp_input(n, ifID); } } @@ -264,7 +254,6 @@ int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, int ddp_pru_sockaddr(struct socket *so, struct sockaddr **nam) { - int s; struct atpcb *pcb; struct sockaddr_at *sat; @@ -273,9 +262,7 @@ int ddp_pru_sockaddr(struct socket *so, return(ENOMEM); bzero((caddr_t)sat, sizeof(*sat)); - s = splnet(); if ((pcb = sotoatpcb(so)) == NULL) { - splx(s); FREE(sat, M_SONAME); return(EINVAL); } @@ -284,7 +271,6 @@ int ddp_pru_sockaddr(struct socket *so, sat->sat_len = sizeof(*sat); sat->sat_port = pcb->lport; sat->sat_addr = pcb->laddr; - splx(s); *nam = (struct sockaddr *)sat; return(0); @@ -294,7 +280,6 @@ int ddp_pru_sockaddr(struct socket *so, int ddp_pru_peeraddr(struct socket *so, struct sockaddr **nam) { - int s; struct atpcb *pcb; struct sockaddr_at *sat; @@ -303,9 +288,7 @@ int ddp_pru_peeraddr(struct socket *so, return (ENOMEM); bzero((caddr_t)sat, sizeof(*sat)); - s = splnet(); if ((pcb = sotoatpcb(so)) == NULL) { - splx(s); FREE(sat, M_SONAME); return(EINVAL); } @@ -314,7 +297,6 @@ int ddp_pru_peeraddr(struct socket *so, sat->sat_len = sizeof(*sat); sat->sat_port = pcb->rport; sat->sat_addr = pcb->raddr; - splx(s); *nam = (struct sockaddr *)sat; return(0); diff --git a/bsd/netat/drv_dep.c b/bsd/netat/drv_dep.c index 8f286db63..61ed827eb 100644 --- a/bsd/netat/drv_dep.c +++ b/bsd/netat/drv_dep.c @@ -76,38 +76,12 @@ struct ifqueue atalkintrq; /* appletalk and aarp packet input queue */ short appletalk_inited = 0; -extern atlock_t - ddpall_lock, ddpinp_lock, arpinp_lock, refall_lock, nve_lock, - aspall_lock, asptmo_lock, atpall_lock, atptmo_lock, atpgen_lock; -extern int (*sys_ATsocket )(), (*sys_ATgetmsg)(), (*sys_ATputmsg)(); -extern int (*sys_ATPsndreq)(), (*sys_ATPsndrsp)(); -extern int (*sys_ATPgetreq)(), (*sys_ATPgetrsp)(); void atalk_load() { - extern int _ATsocket(), _ATgetmsg(), _ATputmsg(); - extern int _ATPsndreq(), _ATPsndrsp(), _ATPgetreq(), _ATPgetrsp(); extern lck_mtx_t *domain_proto_mtx; - sys_ATsocket = _ATsocket; - sys_ATgetmsg = _ATgetmsg; - sys_ATputmsg = _ATputmsg; - sys_ATPsndreq = _ATPsndreq; - sys_ATPsndrsp = _ATPsndrsp; - sys_ATPgetreq = _ATPgetreq; - sys_ATPgetrsp = _ATPgetrsp; - - ATLOCKINIT(ddpall_lock); - ATLOCKINIT(ddpinp_lock); - ATLOCKINIT(arpinp_lock); - ATLOCKINIT(refall_lock); - ATLOCKINIT(aspall_lock); - ATLOCKINIT(asptmo_lock); - ATLOCKINIT(atpall_lock); - ATLOCKINIT(atptmo_lock); - ATLOCKINIT(atpgen_lock); - ATLOCKINIT(nve_lock); atp_init(); atp_link(); @@ -128,14 +102,6 @@ void atalk_unload() /* not currently used */ extern gbuf_t *scb_resource_m; extern gbuf_t *atp_resource_m; - sys_ATsocket = 0; - sys_ATgetmsg = 0; - sys_ATputmsg = 0; - sys_ATPsndreq = 0; - sys_ATPsndrsp = 0; - sys_ATPgetreq = 0; - sys_ATPgetrsp = 0; - atp_unlink(); #ifdef NOT_YET @@ -167,7 +133,7 @@ void appletalk_hack_start() int pat_output(patp, mlist, dst_addr, type) at_ifaddr_t *patp; struct mbuf *mlist; /* packet chain */ - unsigned char *dst_addr; + unsigned char *dst_addr; /* for atalk addr - net # must be in network byte order */ int type; { struct mbuf *m, *m1; diff --git a/bsd/netat/nbp.h b/bsd/netat/nbp.h index 9bac6ef0f..648d373a1 100644 --- a/bsd/netat/nbp.h +++ b/bsd/netat/nbp.h @@ -91,8 +91,16 @@ #define NBP_HDR_SIZE 2 typedef struct at_nbp { - unsigned control : 4, - tuple_count : 4; +#if BYTE_ORDER == BIG_ENDIAN + unsigned + control : 4, + tuple_count : 4; +#endif +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned + tuple_count : 4, + control : 4; +#endif u_char at_nbp_id; at_nbptuple_t tuple[NBP_TUPLE_MAX]; } at_nbp_t; diff --git a/bsd/netat/sys_dep.c b/bsd/netat/sys_dep.c index 272d890dc..9fa677dce 100644 --- a/bsd/netat/sys_dep.c +++ b/bsd/netat/sys_dep.c @@ -51,13 +51,6 @@ #include #include -int (*sys_ATsocket)() = 0; -int (*sys_ATgetmsg)() = 0; -int (*sys_ATputmsg)() = 0; -int (*sys_ATPsndreq)() = 0; -int (*sys_ATPsndrsp)() = 0; -int (*sys_ATPgetreq)() = 0; -int (*sys_ATPgetrsp)() = 0; extern at_state_t at_state; /* global state of AT network */ extern at_ifaddr_t *ifID_home; /* default interface */ @@ -71,6 +64,12 @@ extern lck_mtx_t * atalk_mutex; #define f_offset f_fglob->fg_offset #define f_data f_fglob->fg_data +extern int _ATsocket(int, int *, void *); +extern int _ATgetmsg(int, strbuf_t *, strbuf_t *, int *, int *, void *); +extern int _ATputmsg(); +extern int _ATPsndreq(), _ATPsndrsp(), _ATPgetreq(), _ATPgetrsp(); + + int ATsocket(proc, uap, retval) struct proc *proc; struct ATsocket_args *uap; @@ -78,13 +77,13 @@ int ATsocket(proc, uap, retval) { int err; atalk_lock(); - if (sys_ATsocket) { + if (_ATsocket) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { - *retval = (*sys_ATsocket)(uap->proto, &err, proc); + *retval = _ATsocket((int)uap->proto, (int *)&err, (void *)proc); } } else { *retval = -1; @@ -102,14 +101,14 @@ int ATgetmsg(proc, uap, retval) int err; atalk_lock(); - if (sys_ATgetmsg) { + if (_ATgetmsg) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATgetmsg)(uap->fd, uap->ctlptr, uap->datptr, + (*_ATgetmsg)(uap->fd, uap->ctlptr, uap->datptr, uap->flags, &err, proc); } } else { @@ -128,14 +127,14 @@ int ATputmsg(proc, uap, retval) int err; atalk_lock(); - if (sys_ATputmsg) { + if (_ATputmsg) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATputmsg)(uap->fd, uap->ctlptr, uap->datptr, + _ATputmsg(uap->fd, uap->ctlptr, uap->datptr, uap->flags, &err, proc); } } else { @@ -154,14 +153,14 @@ int ATPsndreq(proc, uap, retval) int err; atalk_lock(); - if (sys_ATPsndreq) { + if (_ATPsndreq) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATPsndreq)(uap->fd, uap->buf, uap->len, + _ATPsndreq(uap->fd, uap->buf, uap->len, uap->nowait, &err, proc); } } else { @@ -180,14 +179,14 @@ int ATPsndrsp(proc, uap, retval) int err; atalk_lock(); - if (sys_ATPsndrsp) { + if (_ATPsndrsp) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATPsndrsp)(uap->fd, uap->respbuff, + _ATPsndrsp(uap->fd, uap->respbuff, uap->resplen, uap->datalen, &err, proc); } } else { @@ -206,14 +205,14 @@ int ATPgetreq(proc, uap, retval) int err; atalk_lock(); - if (sys_ATPgetreq) { + if (_ATPgetreq) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATPgetreq)(uap->fd, uap->buf, uap->buflen, + _ATPgetreq(uap->fd, uap->buf, uap->buflen, &err, proc); } } else { @@ -232,14 +231,14 @@ int ATPgetrsp(proc, uap, retval) int err = 0; atalk_lock(); - if (sys_ATPgetrsp) { + if (_ATPgetrsp) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { *retval = -1; err = ENOTREADY; } else { *retval = - (*sys_ATPgetrsp)(uap->fd, uap->bdsp, &err, proc); + _ATPgetrsp(uap->fd, uap->bdsp, &err, proc); } } else { *retval = -1; diff --git a/bsd/netat/sys_glue.c b/bsd/netat/sys_glue.c index ab185b257..ce23f102a 100644 --- a/bsd/netat/sys_glue.c +++ b/bsd/netat/sys_glue.c @@ -91,7 +91,6 @@ SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD, static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p ); static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p ); -atlock_t refall_lock; caddr_t atp_free_cluster_list = 0; @@ -286,7 +285,7 @@ int _ATrw(fp, rw, uio, p) struct uio *uio; struct proc *p; { - int s, err, len, clen = 0, res; + int err, len, clen = 0, res; gref_t *gref; gbuf_t *m, *mhead, *mprev; @@ -298,7 +297,6 @@ int _ATrw(fp, rw, uio, p) if ((len = uio_resid(uio)) == 0) return 0; - ATDISABLE(s, gref->lock); if (rw == UIO_READ) { KERNEL_DEBUG(DBG_ADSP_ATRW, 0, gref, len, gref->rdhead, 0); @@ -306,23 +304,18 @@ int _ATrw(fp, rw, uio, p) gref->sevents |= POLLMSG; err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT read", 0); gref->sevents &= ~POLLMSG; - if (err != 0) { - ATENABLE(s, gref->lock); + if (err != 0) return err; - } KERNEL_DEBUG(DBG_ADSP_ATRW, 1, gref, gref->rdhead, mhead, gbuf_next(mhead)); } - if (gref->errno) { - ATENABLE(s, gref->lock); + if (gref->errno) return EPIPE; - } if ((gref->rdhead = gbuf_next(mhead)) == 0) gref->rdtail = 0; KERNEL_DEBUG(DBG_ADSP_ATRW, 2, gref, gref->rdhead, mhead, gbuf_next(mhead)); - ATENABLE(s, gref->lock); //##### LD TEST 08/05 // simple_lock(&gref->lock); @@ -354,12 +347,10 @@ int _ATrw(fp, rw, uio, p) gbuf_cont(mprev) = 0; else mhead = 0; - ATDISABLE(s, gref->lock); if (gref->rdhead == 0) gref->rdtail = m; gbuf_next(m) = gref->rdhead; gref->rdhead = m; - ATENABLE(s, gref->lock); } if (mhead) gbuf_freem(mhead); @@ -372,14 +363,11 @@ int _ATrw(fp, rw, uio, p) gref->sevents |= POLLSYNC; err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT write", 0); gref->sevents &= ~POLLSYNC; - if (err != 0) { - ATENABLE(s, gref->lock); + if (err != 0) return err; - } } } - ATENABLE(s, gref->lock); /* allocate a buffer to copy in the write data */ if ((m = gbuf_alloc(AT_WR_OFFSET+len, PRI_MED)) == 0) @@ -440,7 +428,7 @@ int _ATwrite(fp, uio, cred, flags, p) /* bms: update to be callable from kernel */ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) { - int s, err = 0, len; + int err = 0, len; u_int size; gbuf_t *m, *mdata; ioc_t *ioc; @@ -520,7 +508,6 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) gref_wput(gref, m); /* wait for the ioc ack */ - ATDISABLE(s, gref->lock); while ((m = gref->ichead) == 0) { gref->sevents |= POLLPRI; #ifdef APPLETALK_DEBUG @@ -529,7 +516,6 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) err = msleep(&gref->iocevent, atalk_mutex, PSOCK | PCATCH, "AT ioctl", 0); gref->sevents &= ~POLLPRI; if (err != 0) { - ATENABLE(s, gref->lock); #ifdef APPLETALK_DEBUG kprintf("at_ioctl: EINTR\n"); #endif @@ -543,7 +529,6 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) gref->ichead = gbuf_next(m); - ATENABLE(s, gref->lock); #ifdef APPLETALK_DEBUG kprintf("at_ioctl: woke up from ioc sleep gref = 0x%x\n", @@ -630,7 +615,7 @@ int _ATselect(fp, which, wql, proc) void * wql; struct proc *proc; { - int s, err, rc = 0; + int err, rc = 0; gref_t *gref; /* Radar 4128949: Drop the proc_fd lock here to avoid lock inversion issues with the other AT calls @@ -648,7 +633,6 @@ int _ATselect(fp, which, wql, proc) if (err != 0) rc = 1; else { - ATDISABLE(s, gref->lock); if (which == FREAD) { if (gref->rdhead || (gref->readable && (*gref->readable)(gref))) rc = 1; @@ -668,7 +652,6 @@ int _ATselect(fp, which, wql, proc) } else rc = 1; } - ATENABLE(s, gref->lock); } return rc; @@ -686,9 +669,7 @@ void atalk_putnext(gref, m) gref_t *gref; gbuf_t *m; { - int s; - ATDISABLE(s, gref->lock); /* *** potential leak? *** */ gbuf_next(m) = 0; @@ -733,7 +714,6 @@ void atalk_putnext(gref, m) } } /* switch gbuf_type(m) */ - ATENABLE(s, gref->lock); } /* atalk_putnext */ void atalk_enablew(gref) @@ -746,9 +726,7 @@ void atalk_enablew(gref) void atalk_flush(gref) gref_t *gref; { - int s; - ATDISABLE(s, gref->lock); if (gref->rdhead) { gbuf_freel(gref->rdhead); gref->rdhead = 0; @@ -757,7 +735,6 @@ void atalk_flush(gref) gbuf_freel(gref->ichead); gref->ichead = 0; } - ATENABLE(s, gref->lock); } /* @@ -768,8 +745,6 @@ void atalk_notify(gref, errno) register gref_t *gref; int errno; { - int s; - ATDISABLE(s, gref->lock); if (gref->atpcb_socket) { /* For DDP -- @@ -800,35 +775,29 @@ void atalk_notify(gref, errno) } } } - ATENABLE(s, gref->lock); } /* atalk_notify */ void atalk_notify_sel(gref) gref_t *gref; { - int s; - ATDISABLE(s, gref->lock); if (gref->sevents & POLLIN) { gref->sevents &= ~POLLIN; selwakeup(&gref->si); } - ATENABLE(s, gref->lock); } int atalk_peek(gref, event) gref_t *gref; unsigned char *event; { - int s, rc; + int rc; - ATDISABLE(s, gref->lock); if (gref->rdhead) { *event = *gbuf_rptr(gref->rdhead); rc = 0; } else rc = -1; - ATENABLE(s, gref->lock); return rc; } @@ -874,15 +843,13 @@ int gref_alloc(grefp) gref_t **grefp; { extern gbuf_t *atp_resource_m; - int i, s; + int i; gbuf_t *m; gref_t *gref, *gref_array; *grefp = (gref_t *)NULL; - ATDISABLE(s, refall_lock); if (gref_free_list == 0) { - ATENABLE(s, refall_lock); #ifdef APPLETALK_DEBUG kprintf("gref_alloc: gbufalloc size=%d\n", GREF_PER_BLK*sizeof(gref_t)); #endif @@ -892,7 +859,6 @@ int gref_alloc(grefp) gref_array = (gref_t *)gbuf_rptr(m); for (i=0; i < GREF_PER_BLK-1; i++) gref_array[i].atpcb_next = (gref_t *)&gref_array[i+1]; - ATDISABLE(s, refall_lock); gbuf_cont(m) = atp_resource_m; atp_resource_m = m; gref_array[i].atpcb_next = gref_free_list; @@ -901,10 +867,6 @@ int gref_alloc(grefp) gref = gref_free_list; gref_free_list = gref->atpcb_next; - ATENABLE(s, refall_lock); - ATLOCKINIT(gref->lock); -//### LD Test 08/05/98 -// simple_lock_init(&gref->lock); ATEVENTINIT(gref->event); ATEVENTINIT(gref->iocevent); @@ -918,7 +880,7 @@ int gref_alloc(grefp) /* bms: make gref_close callable from kernel */ int gref_close(gref_t *gref) { - int s, rc; + int rc; switch (gref->proto) { @@ -946,11 +908,9 @@ int gref_close(gref_t *gref) selthreadclear(&gref->si); /* from original gref_free() */ - ATDISABLE(s, refall_lock); bzero((char *)gref, sizeof(gref_t)); gref->atpcb_next = gref_free_list; gref_free_list = gref; - ATENABLE(s, refall_lock); } return rc; @@ -987,7 +947,7 @@ struct mbuf *m_clattach(extbuf, extfree, extsize, extarg, wait) m->m_ext.ext_size = extsize; m->m_ext.ext_arg = extarg; m->m_ext.ext_refs.forward = - m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; + m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; m->m_data = extbuf; m->m_flags |= M_EXT; diff --git a/bsd/netat/sysglue.h b/bsd/netat/sysglue.h index 6aa9e37c6..45ceec8fd 100644 --- a/bsd/netat/sysglue.h +++ b/bsd/netat/sysglue.h @@ -58,21 +58,13 @@ typedef struct { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with ioccmd_t */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - typedef struct { int ic_cmd; int ic_timout; int ic_len; - user_addr_t ic_dp; + user_addr_t ic_dp __attribute__((aligned(8))); } user_ioccmd_t; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL_PRIVATE #endif // KERNEL @@ -158,10 +150,6 @@ typedef struct { typedef int atevent_t; typedef int atlock_t; -typedef int *atomic_p; -#define ATLOCKINIT(a) (a = (atlock_t) EVENT_NULL) -#define ATDISABLE(l, a) -#define ATENABLE(l, a) #define ATEVENTINIT(a) (a = (atevent_t) EVENT_NULL) #define DDP_OUTPUT(m) ddp_putmsg(0,m) #define StaticProc static diff --git a/bsd/netinet/icmp_var.h b/bsd/netinet/icmp_var.h index ec3a8ef5c..948997e69 100644 --- a/bsd/netinet/icmp_var.h +++ b/bsd/netinet/icmp_var.h @@ -110,4 +110,4 @@ extern int badport_bandlim(int); #define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ #define BANDLIM_MAX 4 #endif KERNEL_PRIVATE -#endif _NETINET_ICMP_VAR_H_ +#endif /* _NETINET_ICMP_VAR_H_ */ diff --git a/bsd/netinet/if_ether.h b/bsd/netinet/if_ether.h index fa65cfe8b..79440febd 100644 --- a/bsd/netinet/if_ether.h +++ b/bsd/netinet/if_ether.h @@ -144,4 +144,4 @@ int arpresolve(struct ifnet *, struct rtentry *, struct mbuf *, void arp_ifinit(struct ifnet *, struct ifaddr *); #endif KERNEL_PRIVATE -#endif _NETINET_IF_ETHER_H_ +#endif /* _NETINET_IF_ETHER_H_ */ diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index 695e07bff..868bcb95a 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -595,4 +595,4 @@ u_long in_netof(struct in_addr); const char *inet_ntop(int, const void *, char *, size_t); /* in libkern */ #endif /* KERNEL */ -#endif _NETINET_IN_H_ +#endif /* _NETINET_IN_H_ */ diff --git a/bsd/netinet/ip.h b/bsd/netinet/ip.h index 7b75ffb2c..e134cb929 100644 --- a/bsd/netinet/ip.h +++ b/bsd/netinet/ip.h @@ -58,7 +58,7 @@ #ifndef _NETINET_IP_H_ #define _NETINET_IP_H_ #include -#include > /* XXX temporary hack to get u_ types */ +#include /* XXX temporary hack to get u_ types */ #include #include diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index 5d4ffb3f2..e3da4e2f9 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -164,7 +164,6 @@ div_init(void) * allocate the lock attribute for divert pcb mutexes */ pcbinfo->mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(pcbinfo->mtx_attr); if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) return; /* pretty much dead if this fails... */ @@ -634,12 +633,9 @@ __private_extern__ int div_lock(struct socket *so, int refcount, int lr) { int lr_saved; -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif #ifdef MORE_DICVLOCK_DEBUG printf("div_lock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", @@ -662,7 +658,8 @@ div_lock(struct socket *so, int refcount, int lr) if (refcount) so->so_usecount++; - so->reserved3 = (void *)lr_saved; + so->lock_lr[so->next_lock_lr] = (u_int32_t *)lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } @@ -673,12 +670,11 @@ div_unlock(struct socket *so, int refcount, int lr) int lr_saved; lck_mtx_t * mutex_held; struct inpcb *inp = sotoinpcb(so); -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif + #ifdef MORE_DICVLOCK_DEBUG printf("div_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", @@ -707,8 +703,9 @@ div_unlock(struct socket *so, int refcount, int lr) return (0); } lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = (u_int *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); - so->reserved4 = (void *)lr_saved; return (0); } diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index 2b815370e..6b599e42c 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -105,6 +105,10 @@ */ static dn_key curr_time = 0 ; /* current simulation time */ +/* this is for the timer that fires to call dummynet() - we only enable the timer when + there are packets to process, otherwise it's disabled */ +static int timer_enabled = 0; + static int dn_hash_size = 64 ; /* default hash size */ /* statistics on number of queue searches and search steps */ @@ -469,9 +473,6 @@ transmit_event(struct dn_pipe *pipe) break ; } case DN_TO_IP_IN : - ip = mtod(m, struct ip *); - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); proto_inject(PF_INET, m); break ; @@ -519,8 +520,15 @@ transmit_event(struct dn_pipe *pipe) * before being able to transmit a packet. The credit is taken from * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ + +/* hz is 100, which gives a granularity of 10ms in the old timer. + * The timer has been changed to fire every 1ms, so the use of + * hz has been modified here. All instances of hz have been left + * in place but adjusted by a factor of 10 so that hz is functionally + * equal to 1000. + */ #define SET_TICKS(_m, q, p) \ - ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ + ((_m)->m_pkthdr.len*8*(hz*10) - (q)->numbytes + p->bandwidth - 1 ) / \ p->bandwidth ; /* @@ -580,7 +588,7 @@ ready_event(struct dn_flow_queue *q) q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; while ( (pkt = q->head) != NULL ) { int len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; + int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; if (len_scaled > q->numbytes ) break ; q->numbytes -= len_scaled ; @@ -650,7 +658,7 @@ ready_event_wfq(struct dn_pipe *p) struct mbuf *pkt = q->head; struct dn_flow_set *fs = q->fs; u_int64_t len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len*8*hz : 0 ; + int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; heap_extract(sch, NULL); /* remove queue from heap */ p->numbytes -= len_scaled ; @@ -737,7 +745,7 @@ ready_event_wfq(struct dn_pipe *p) } /* - * This is called once per tick, or HZ times per second. It is used to + * This is called every 1ms. It is used to * increment the current tick counter and schedule expired events. */ static void @@ -748,6 +756,8 @@ dummynet(void * __unused unused) struct dn_heap *heaps[3]; int i; struct dn_pipe *pe ; + struct timespec ts; + struct timeval tv; heaps[0] = &ready_heap ; /* fixed-rate queues */ heaps[1] = &wfq_ready_heap ; /* wfq queues */ @@ -755,25 +765,31 @@ dummynet(void * __unused unused) lck_mtx_lock(dn_mutex); - curr_time++ ; + /* make all time measurements in milliseconds (ms) - + * here we convert secs and usecs to msecs (just divide the + * usecs and take the closest whole number). + */ + microuptime(&tv); + curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); + for (i=0; i < 3 ; i++) { h = heaps[i]; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { - if (h->p[0].key > curr_time) + if (h->p[0].key > curr_time) printf("dummynet: warning, heap %d is %d ticks late\n", - i, (int)(curr_time - h->p[0].key)); - p = h->p[0].object ; /* store a copy before heap_extract */ - heap_extract(h, NULL); /* need to extract before processing */ - if (i == 0) + i, (int)(curr_time - h->p[0].key)); + p = h->p[0].object ; /* store a copy before heap_extract */ + heap_extract(h, NULL); /* need to extract before processing */ + if (i == 0) ready_event(p) ; - else if (i == 1) { + else if (i == 1) { struct dn_pipe *pipe = p; if (pipe->if_name[0] != '\0') - printf("dummynet: bad ready_event_wfq for pipe %s\n", + printf("dummynet: bad ready_event_wfq for pipe %s\n", pipe->if_name); else - ready_event_wfq(p) ; - } else + ready_event_wfq(p) ; + } else transmit_event(p); } } @@ -788,9 +804,22 @@ dummynet(void * __unused unused) pe->sum -= q->fs->weight ; } - lck_mtx_unlock(dn_mutex); + /* check the heaps to see if there's still stuff in there, and + * only set the timer if there are packets to process + */ + timer_enabled = 0; + for (i=0; i < 3 ; i++) { + h = heaps[i]; + if (h->elements > 0) { // set the timer + ts.tv_sec = 0; + ts.tv_nsec = 1 * 1000000; // 1ms + timer_enabled = 1; + bsd_timeout(dummynet, NULL, &ts); + break; + } + } - timeout(dummynet, NULL, 1); + lck_mtx_unlock(dn_mutex); } /* @@ -1121,6 +1150,8 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) u_int64_t len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; int is_pipe; + struct timespec ts; + struct timeval tv; #if IPFW2 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; @@ -1135,6 +1166,13 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) pipe_nr &= 0xffff ; lck_mtx_lock(dn_mutex); + + /* make all time measurements in milliseconds (ms) - + * here we convert secs and usecs to msecs (just divide the + * usecs and take the closest whole number). + */ + microuptime(&tv); + curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); /* * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. @@ -1287,6 +1325,14 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) } } done: + /* start the timer and set global if not already set */ + if (!timer_enabled) { + ts.tv_sec = 0; + ts.tv_nsec = 1 * 1000000; // 1ms + timer_enabled = 1; + bsd_timeout(dummynet, NULL, &ts); + } + lck_mtx_unlock(dn_mutex); return 0; @@ -1586,7 +1632,7 @@ config_pipe(struct dn_pipe *p) * delay = ms, must be translated into ticks. * qsize = slots/bytes */ - p->delay = ( p->delay * hz ) / 1000 ; + p->delay = ( p->delay * (hz*10) ) / 1000 ; /* We need either a pipe number or a flow_set number */ if (p->pipe_nr == 0 && pfs->fs_nr == 0) return EINVAL ; @@ -1938,7 +1984,7 @@ dummynet_get(struct sockopt *sopt) * After each flow_set, copy the queue descriptor it owns. */ bcopy(p, bp, sizeof( *p ) ); - pipe_bp->delay = (pipe_bp->delay * 1000) / hz ; + pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; /* * XXX the following is a hack based on ->next being the * first field in dn_pipe and dn_flow_set. The correct @@ -2025,7 +2071,6 @@ ip_dn_init(void) dn_mutex_grp_attr = lck_grp_attr_alloc_init(); dn_mutex_grp = lck_grp_alloc_init("dn", dn_mutex_grp_attr); dn_mutex_attr = lck_attr_alloc_init(); - lck_attr_setdefault(dn_mutex_attr); if ((dn_mutex = lck_mtx_alloc_init(dn_mutex_grp, dn_mutex_attr)) == NULL) { printf("ip_dn_init: can't alloc dn_mutex\n"); @@ -2045,6 +2090,4 @@ ip_dn_init(void) ip_dn_ctl_ptr = ip_dn_ctl; ip_dn_io_ptr = dummynet_io; ip_dn_ruledel_ptr = dn_rule_delete; - - timeout(dummynet, NULL, 1); } diff --git a/bsd/netinet/ip_fw.h b/bsd/netinet/ip_fw.h index 3f19ae79f..606c62844 100644 --- a/bsd/netinet/ip_fw.h +++ b/bsd/netinet/ip_fw.h @@ -316,4 +316,4 @@ extern struct ipfw_flow_id last_pkt ; #endif KERNEL_PRIVATE #endif !IPFW2 -#endif _IP_FW_H +#endif /* _IP_FW_H */ diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index 9801fb7d3..84f8f0c1a 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -2942,8 +2942,10 @@ ipfw_ctl(struct sockopt *sopt) int i, len = 0; struct ip_old_fw *buf2, *rule_vers0; + lck_mtx_lock(ipfw_mutex); buf2 = _MALLOC(static_count * sizeof(struct ip_old_fw), M_TEMP, M_WAITOK); if (buf2 == 0) { + lck_mtx_unlock(ipfw_mutex); error = ENOBUFS; } @@ -2959,6 +2961,7 @@ ipfw_ctl(struct sockopt *sopt) len += sizeof(*rule_vers0); rule_vers0++; } + lck_mtx_unlock(ipfw_mutex); error = sooptcopyout(sopt, buf2, len); _FREE(buf2, M_TEMP); } @@ -2968,11 +2971,13 @@ ipfw_ctl(struct sockopt *sopt) struct ipfw_dyn_rule_compat *dyn_rule_vers1, *dyn_last = NULL; ipfw_dyn_rule *p; + lck_mtx_lock(ipfw_mutex); buf_size = static_count * sizeof(struct ip_fw_compat) + dyn_count * sizeof(struct ipfw_dyn_rule_compat); buf2 = _MALLOC(buf_size, M_TEMP, M_WAITOK); if (buf2 == 0) { + lck_mtx_unlock(ipfw_mutex); error = ENOBUFS; } @@ -3017,6 +3022,7 @@ ipfw_ctl(struct sockopt *sopt) dyn_last->next = NULL; } } + lck_mtx_unlock(ipfw_mutex); error = sooptcopyout(sopt, buf2, len); _FREE(buf2, M_TEMP); @@ -3263,7 +3269,6 @@ ipfw_init(void) ipfw_mutex_grp_attr = lck_grp_attr_alloc_init(); ipfw_mutex_grp = lck_grp_alloc_init("ipfw", ipfw_mutex_grp_attr); ipfw_mutex_attr = lck_attr_alloc_init(); - lck_attr_setdefault(ipfw_mutex_attr); if ((ipfw_mutex = lck_mtx_alloc_init(ipfw_mutex_grp, ipfw_mutex_attr)) == NULL) { printf("ipfw_init: can't alloc ipfw_mutex\n"); diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index f567de39e..b94ebd468 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -348,14 +348,11 @@ ip_init() ipf_init(); ip_mutex_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(ip_mutex_grp_attr); ip_mutex_grp = lck_grp_alloc_init("ip", ip_mutex_grp_attr); ip_mutex_attr = lck_attr_alloc_init(); - lck_attr_setdefault(ip_mutex_attr); - if ((ip_mutex = lck_mtx_alloc_init(ip_mutex_grp, ip_mutex_attr)) == NULL) { printf("ip_init: can't alloc ip_mutex\n"); return; diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h index c9e42d75d..3fef18725 100644 --- a/bsd/netinet/ip_mroute.h +++ b/bsd/netinet/ip_mroute.h @@ -290,4 +290,4 @@ extern int (*mrt_ioctl)(int, caddr_t, struct proc *); #endif #endif KERNEL_PRIVATE -#endif _NETINET_IP_MROUTE_H_ +#endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 44f89c3cf..fc9f8fa0e 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -476,6 +476,11 @@ ip_output_list( lck_mtx_unlock(ip_mutex); ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { if ((struct ipfilter *)inject_filter_ref == filter) @@ -494,6 +499,11 @@ ip_output_list( } } } + + /* set back to host byte order */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + lck_mtx_lock(ip_mutex); ipf_unref(); didfilter = 1; @@ -607,6 +617,11 @@ ip_output_list( lck_mtx_unlock(ip_mutex); ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { if ((struct ipfilter *)inject_filter_ref == filter) @@ -625,6 +640,11 @@ ip_output_list( } } } + + /* set back to host byte order */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + ipf_unref(); lck_mtx_lock(ip_mutex); } @@ -802,6 +822,11 @@ ip_output_list( lck_mtx_unlock(ip_mutex); ipf_ref(); + + /* 4135317 - always pass network byte order to filter */ + HTONS(ip->ip_len); + HTONS(ip->ip_off); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (filter->ipf_filter.ipf_output) { errno_t result; @@ -817,6 +842,11 @@ ip_output_list( } } } + + /* set back to host byte order */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + ipf_unref(); lck_mtx_lock(ip_mutex); } diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index ddba0e79f..ce5d0569f 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -236,5 +236,5 @@ void ip_rsvp_force_done(struct socket *); void in_delayed_cksum(struct mbuf *m); -#endif KERNEL_PRIVATE -#endif !_NETINET_IP_VAR_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* !_NETINET_IP_VAR_H_ */ diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c index 52b8a0f25..5750c6151 100644 --- a/bsd/netinet/kpi_ipfilter.c +++ b/bsd/netinet/kpi_ipfilter.c @@ -301,6 +301,11 @@ ipf_injectv4_out( m_freem(m); return ENETUNREACH; } + + /* Put ip_len and ip_off in host byte order, ip_output expects that */ + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + /* Send */ error = ip_output(m, NULL, &ro, IP_ALLOWBROADCAST | IP_RAWOUTPUT, imo); @@ -449,7 +454,6 @@ ipf_init(void) error = ENOMEM; goto done; } - lck_grp_attr_setdefault(grp_attributes); lck_grp = lck_grp_alloc_init("IP Filter", grp_attributes); if (lck_grp == 0) { @@ -464,7 +468,6 @@ ipf_init(void) error = ENOMEM; goto done; } - lck_attr_setdefault(lck_attributes); kipf_lock = lck_mtx_alloc_init(lck_grp, lck_attributes); if (kipf_lock == 0) { diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 5332bf708..4028d1291 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -144,7 +144,6 @@ rip_init() * allocate lock group attribute and group for udp pcb mutexes */ pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr); @@ -152,7 +151,6 @@ rip_init() * allocate the lock attribute for udp pcb mutexes */ pcbinfo->mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(pcbinfo->mtx_attr); if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) return; /* pretty much dead if this fails... */ @@ -736,22 +734,25 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, return rip_output(m, so, dst); } +/* note: rip_unlock is called from different protos instead of the generic socket_unlock, + * it will handle the socket dealloc on last reference + * */ int rip_unlock(struct socket *so, int refcount, int debug) { int lr_saved; struct inpcb *inp = sotoinpcb(so); -#ifdef __ppc__ - if (debug == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + + if (debug == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = debug; -#endif + if (refcount) { if (so->so_usecount <= 0) panic("rip_unlock: bad refoucnt so=%x val=%x\n", so, so->so_usecount); so->so_usecount--; if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { + /* cleanup after last reference */ lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); lck_rw_lock_exclusive(ripcbinfo.mtx); in_pcbdispose(inp); @@ -759,6 +760,8 @@ rip_unlock(struct socket *so, int refcount, int debug) return(0); } } + so->unlock_lr[so->next_unlock_lr] = (u_int *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); return(0); } diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index 0e72a2325..f14ece337 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -332,14 +332,12 @@ tcp_init() * allocate lock group attribute and group for tcp pcb mutexes */ pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); pcbinfo->mtx_grp = lck_grp_alloc_init("tcppcb", pcbinfo->mtx_grp_attr); /* * allocate the lock attribute for tcp pcb mutexes */ pcbinfo->mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(pcbinfo->mtx_attr); if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) { printf("tcp_init: mutex not alloced!\n"); @@ -1716,12 +1714,9 @@ tcp_lock(so, refcount, lr) int lr; { int lr_saved; -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif if (so->so_pcb) { lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); @@ -1737,7 +1732,8 @@ tcp_lock(so, refcount, lr) if (refcount) so->so_usecount++; - so->reserved3 = (void *)lr_saved; + so->lock_lr[so->next_lock_lr] = (u_int32_t *)lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } @@ -1748,12 +1744,9 @@ tcp_unlock(so, refcount, lr) int lr; { int lr_saved; -#ifdef __ppc__ - if (lr == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (lr == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = lr; -#endif #ifdef MORE_TCPLOCK_DEBUG printf("tcp_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", @@ -1764,15 +1757,14 @@ tcp_unlock(so, refcount, lr) if (so->so_usecount < 0) panic("tcp_unlock: so=%x usecount=%x\n", so, so->so_usecount); - if (so->so_pcb == NULL) { + if (so->so_pcb == NULL) panic("tcp_unlock: so=%x NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); - lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); - } else { lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = (u_int *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } - so->reserved4 = (void *)lr_saved; return (0); } diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index 3ecfaee90..dffbd3e9c 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -199,12 +199,10 @@ udp_init() * allocate lock group attribute and group for udp pcb mutexes */ pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); pcbinfo->mtx_grp = lck_grp_alloc_init("udppcb", pcbinfo->mtx_grp_attr); pcbinfo->mtx_attr = lck_attr_alloc_init(); - lck_attr_setdefault(pcbinfo->mtx_attr); if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) return; /* pretty much dead if this fails... */ @@ -1222,27 +1220,22 @@ udp_lock(so, refcount, debug) int refcount, debug; { int lr_saved; -#ifdef __ppc__ - if (debug == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + if (debug == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = debug; -#endif if (so->so_pcb) { lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } - else { + else panic("udp_lock: so=%x NO PCB! lr=%x\n", so, lr_saved); - lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); - } if (refcount) so->so_usecount++; - so->reserved3= lr_saved; + so->lock_lr[so->next_lock_lr] = (void *)lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } @@ -1255,12 +1248,11 @@ udp_unlock(so, refcount, debug) int lr_saved; struct inpcb *inp = sotoinpcb(so); struct inpcbinfo *pcbinfo = &udbinfo; -#ifdef __ppc__ - if (debug == 0) { - __asm__ volatile("mflr %0" : "=r" (lr_saved)); - } + + if (debug == 0) + lr_saved = (unsigned int) __builtin_return_address(0); else lr_saved = debug; -#endif + if (refcount) { so->so_usecount--; #if 0 @@ -1273,18 +1265,16 @@ udp_unlock(so, refcount, debug) } #endif } - if (so->so_pcb == NULL) { + if (so->so_pcb == NULL) panic("udp_unlock: so=%x NO PCB! lr=%x\n", so, lr_saved); - lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); - lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); - } else { lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } - so->reserved4 = lr_saved; return (0); } diff --git a/bsd/netinet/udp_var.h b/bsd/netinet/udp_var.h index 3a1c1873b..3de8e3add 100644 --- a/bsd/netinet/udp_var.h +++ b/bsd/netinet/udp_var.h @@ -145,5 +145,5 @@ lck_mtx_t * udp_getlock (struct socket *, int); void * udp_getlock (struct socket *, int); #endif -#endif KERNEL_PRIVATE -#endif _NETINET_UDP_VAR_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* _NETINET_UDP_VAR_H_ */ diff --git a/bsd/netinet6/ah.h b/bsd/netinet6/ah.h index 3e7f8dcf8..e2a75ebd8 100644 --- a/bsd/netinet6/ah.h +++ b/bsd/netinet6/ah.h @@ -88,4 +88,4 @@ extern int ah4_calccksum(struct mbuf *, caddr_t, size_t, const struct ah_algorithm *, struct secasvar *); #endif KERNEL_PRIVATE -#endif _NETINET6_AH_H_ +#endif /* _NETINET6_AH_H_ */ diff --git a/bsd/netinet6/esp.h b/bsd/netinet6/esp.h index 1d9d0c0ba..6802e81a3 100644 --- a/bsd/netinet6/esp.h +++ b/bsd/netinet6/esp.h @@ -102,6 +102,6 @@ extern size_t esp_hdrsiz(struct ipsecrequest *); extern int esp_schedule(const struct esp_algorithm *, struct secasvar *); extern int esp_auth(struct mbuf *, size_t, size_t, struct secasvar *, u_char *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_ESP_H_ +#endif /* _NETINET6_ESP_H_ */ diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index 47f18fed4..989ea25a4 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -182,7 +182,8 @@ extern const struct in6_addr in6mask32; extern const struct in6_addr in6mask64; extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ + /* * Macros started with IPV6_ADDR is KAME local @@ -413,7 +414,7 @@ struct route_in6 { #define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ #define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ #define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ -#endif 0 +#endif /* 0 */ #define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ #endif /* _POSIX_C_SOURCE */ #define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ @@ -441,12 +442,12 @@ struct route_in6 { #ifndef _POSIX_C_SOURCE #ifndef KERNEL #define IPV6_BINDV6ONLY IPV6_V6ONLY -#endif KERNEL +#endif /* KERNEL */ #if 1 /*IPSEC*/ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ -#endif 1 +#endif /* 1 */ #define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */ #if 1 /*IPV6FIREWALL*/ @@ -455,7 +456,7 @@ struct route_in6 { #define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ #define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ #define IPV6_FW_GET 34 /* get entire firewall rule chain */ -#endif 1 +#endif /* 1 */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ @@ -626,7 +627,7 @@ void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #ifndef KERNEL __BEGIN_DECLS @@ -670,6 +671,6 @@ extern int inet6_rth_reverse(const void *, void *); extern int inet6_rth_segments(const void *); extern struct in6_addr *inet6_rth_getaddr(const void *, int); __END_DECLS -#endif !KERNEL +#endif /* !KERNEL */ #endif /* _POSIX_C_SOURCE */ #endif /* !_NETINET6_IN6_H_ */ diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index 787393d33..0ef5ef924 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -648,6 +648,6 @@ int in6_embedscope __P((struct in6_addr *, const struct sockaddr_in6 *, int in6_recoverscope __P((struct sockaddr_in6 *, const struct in6_addr *, struct ifnet *)); void in6_clearscope __P((struct in6_addr *)); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_IN6_VAR_H_ +#endif /* _NETINET6_IN6_VAR_H_ */ diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index ccfce46ed..64221cf42 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -211,11 +211,9 @@ ip6_init() } ip6_mutex_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(ip6_mutex_grp_attr); ip6_mutex_grp = lck_grp_alloc_init("ip6", ip6_mutex_grp_attr); ip6_mutex_attr = lck_attr_alloc_init(); - lck_attr_setdefault(ip6_mutex_attr); if ((ip6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { printf("ip6_init: can't alloc ip6_mutex\n"); diff --git a/bsd/netinet6/ipcomp.h b/bsd/netinet6/ipcomp.h index 41ab61b85..8d17dc7de 100644 --- a/bsd/netinet6/ipcomp.h +++ b/bsd/netinet6/ipcomp.h @@ -64,7 +64,7 @@ struct ipsecrequest; extern const struct ipcomp_algorithm *ipcomp_algorithm_lookup(int); extern void ipcomp4_input(struct mbuf *, int); extern int ipcomp4_output(struct mbuf *, struct ipsecrequest *); -#endif KERNEL_PRIVATE -#endif KERNEL +#endif /* KERNEL_PRIVATE */ +#endif /* KERNEL */ -#endif _NETINET6_IPCOMP_H_ +#endif /* _NETINET6_IPCOMP_H_ */ diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index 2a4f6dcef..154ff39a1 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -345,4 +345,4 @@ extern char *ipsec_dump_policy(caddr_t, char *); extern const char *ipsec_strerror(void); #endif KERNEL -#endif _NETINET6_IPSEC_H_ +#endif /* _NETINET6_IPSEC_H_ */ diff --git a/bsd/netinet6/pim6_var.h b/bsd/netinet6/pim6_var.h index 1cb8ec648..c90b9bdde 100644 --- a/bsd/netinet6/pim6_var.h +++ b/bsd/netinet6/pim6_var.h @@ -52,7 +52,6 @@ struct pim6stat { u_quad_t pim6s_snd_registers; /* sent registers */ }; -extern struct pim6stat pim6stat; /* diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index 2c635da4c..aee29ba54 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -526,7 +526,6 @@ key_init(void) sadb_mutex_grp_attr = lck_grp_attr_alloc_init(); sadb_mutex_grp = lck_grp_alloc_init("sadb", sadb_mutex_grp_attr); sadb_mutex_attr = lck_attr_alloc_init(); - lck_attr_setdefault(sadb_mutex_attr); if ((sadb_mutex = lck_mtx_alloc_init(sadb_mutex_grp, sadb_mutex_attr)) == NULL) { printf("key_init: can't alloc sadb_mutex\n"); diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index 9da9e3b8c..8458d6313 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -94,6 +94,7 @@ #ifndef NFS_MAXDIRATTRTIMO #define NFS_MAXDIRATTRTIMO 60 #endif +#define NFS_IOSIZE (256 * 1024) /* suggested I/O size */ #define NFS_WSIZE 16384 /* Def. write data size <= 16K */ #define NFS_RSIZE 16384 /* Def. read data size <= 16K */ #define NFS_DGRAM_WSIZE 8192 /* UDP Def. write data size <= 8K */ @@ -215,17 +216,13 @@ struct nfs_args3 { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with nfs_args */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_nfs_args { int version; /* args structure version number */ - user_addr_t addr; /* file server address */ + user_addr_t addr __attribute((aligned(8))); /* file server address */ int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ - user_addr_t fh; /* File handle to be mounted */ + user_addr_t fh __attribute((aligned(8))); /* File handle to be mounted */ int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ @@ -237,7 +234,7 @@ struct user_nfs_args { int readahead; /* # of blocks to readahead */ int leaseterm; /* obsolete: Term (sec) of lease */ int deadthresh; /* obsolete: Retrans threshold */ - user_addr_t hostname; /* server's name */ + user_addr_t hostname __attribute((aligned(8))); /* server's name */ /* NFS_ARGSVERSION 3 ends here */ int acregmin; /* reg file min attr cache timeout */ int acregmax; /* reg file max attr cache timeout */ @@ -246,11 +243,11 @@ struct user_nfs_args { }; struct user_nfs_args3 { int version; /* args structure version number */ - user_addr_t addr; /* file server address */ + user_addr_t addr __attribute((aligned(8))); /* file server address */ int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ - user_addr_t fh; /* File handle to be mounted */ + user_addr_t fh __attribute((aligned(8))); /* File handle to be mounted */ int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ @@ -262,13 +259,9 @@ struct user_nfs_args3 { int readahead; /* # of blocks to readahead */ int leaseterm; /* obsolete: Term (sec) of lease */ int deadthresh; /* obsolete: Retrans threshold */ - user_addr_t hostname; /* server's name */ + user_addr_t hostname __attribute((aligned(8))); /* server's name */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL /* @@ -340,20 +333,12 @@ struct nfsd_args { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with nfsd_args */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_nfsd_args { int sock; /* Socket to serve */ - user_addr_t name; /* Client addr for connection based sockets */ + user_addr_t name __attribute((aligned(8))); /* Client addr for connection based sockets */ int namelen; /* Length of name */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL struct nfsd_srvargs { @@ -433,10 +418,6 @@ struct nfs_export_args { #ifdef KERNEL /* LP64 version of export_args */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_nfs_export_args { uint32_t nxa_fsid; /* export FS ID */ uint32_t nxa_expid; /* export ID */ @@ -447,10 +428,6 @@ struct user_nfs_export_args { user_addr_t nxa_nets; /* array of net args */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL */ /* nfs export arg flags */ diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index b4a0836a2..482d4aef4 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -154,7 +154,6 @@ void nfs_nbinit(void) { nfs_buf_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfs_buf_lck_grp_attr); nfs_buf_lck_grp = lck_grp_alloc_init("nfs_buf", nfs_buf_lck_grp_attr); nfs_buf_lck_attr = lck_attr_alloc_init(); @@ -628,6 +627,7 @@ nfs_buf_get( struct nfsbuf **bpp) { struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); struct nfsbuf *bp; int biosize, bufsize; kauth_cred_t cred; @@ -640,10 +640,14 @@ nfs_buf_get( *bpp = NULL; bufsize = size; - if (bufsize > MAXBSIZE) - panic("nfs_buf_get: buffer larger than MAXBSIZE requested"); + if (bufsize > NFS_MAXBSIZE) + panic("nfs_buf_get: buffer larger than NFS_MAXBSIZE requested"); - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + if (!nmp) { + FSDBG_BOT(541, vp, blkno, 0, ENXIO); + return (ENXIO); + } + biosize = nmp->nm_biosize; if (UBCINVALID(vp) || !UBCINFOEXISTS(vp)) { operation = NBLK_META; @@ -973,7 +977,9 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) panic("ubc_upl_unmap failed"); bp->nb_data = NULL; } - if (bp->nb_flags & (NB_ERROR | NB_INVAL | NB_NOCACHE)) { + /* abort pages if error, invalid, or non-needcommit nocache */ + if ((bp->nb_flags & (NB_ERROR | NB_INVAL)) || + ((bp->nb_flags & NB_NOCACHE) && !(bp->nb_flags & (NB_NEEDCOMMIT | NB_DELWRI)))) { if (bp->nb_flags & (NB_READ | NB_INVAL | NB_NOCACHE)) upl_flags = UPL_ABORT_DUMP_PAGES; else @@ -1003,10 +1009,9 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) /* was this the last buffer in the file? */ if (NBOFF(bp) + bp->nb_bufsize > (off_t)(VTONFS(vp)->n_size)) { /* if so, invalidate all pages of last buffer past EOF */ - int biosize = vfs_statfs(vnode_mount(vp))->f_iosize; off_t start, end; start = trunc_page_64(VTONFS(vp)->n_size) + PAGE_SIZE_64; - end = trunc_page_64(NBOFF(bp) + biosize); + end = trunc_page_64(NBOFF(bp) + bp->nb_bufsize); if (end > start) { if (!(rv = ubc_sync_range(vp, start, end, UBC_INVALIDATE))) printf("nfs_buf_release(): ubc_sync_range failed!\n"); @@ -1031,8 +1036,9 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) wakeup_buffer = 1; } - /* If it's not cacheable, or an error, mark it invalid. */ - if (ISSET(bp->nb_flags, (NB_NOCACHE|NB_ERROR))) + /* If it's non-needcommit nocache, or an error, mark it invalid. */ + if (ISSET(bp->nb_flags, NB_ERROR) || + (ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI)))) SET(bp->nb_flags, NB_INVAL); if ((bp->nb_bufsize <= 0) || ISSET(bp->nb_flags, NB_INVAL)) { @@ -1089,7 +1095,7 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) NFSBUFCNTCHK(1); /* Unlock the buffer. */ - CLR(bp->nb_flags, (NB_ASYNC | NB_NOCACHE | NB_STABLE | NB_IOD)); + CLR(bp->nb_flags, (NB_ASYNC | NB_STABLE | NB_IOD)); CLR(bp->nb_lflags, NBL_BUSY); FSDBG_BOT(548, bp, NBOFF(bp), bp->nb_flags, bp->nb_data); @@ -1418,9 +1424,10 @@ nfs_bioread( return (EINVAL); } + biosize = nmp->nm_biosize; if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) nfs_fsinfo(nmp, vp, cred, p); - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + vtype = vnode_vtype(vp); /* * For nfs, cache consistency can only be maintained approximately. @@ -1982,9 +1989,11 @@ nfs_write(ap) FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), np->n_error); return (np->n_error); } - if ((nmp->nm_flag & NFSMNT_NFSV3) && - !(nmp->nm_state & NFSSTA_GOTFSINFO)) - (void)nfs_fsinfo(nmp, vp, cred, p); + + biosize = nmp->nm_biosize; + if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) + nfs_fsinfo(nmp, vp, cred, p); + if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { NATTRINVALIDATE(np); @@ -2017,8 +2026,6 @@ nfs_write(ap) return (0); } - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; - if (vnode_isnocache(vp)) { if (!(np->n_flag & NNOCACHE)) { if (NVALIDBUFS(np)) { @@ -2059,7 +2066,7 @@ nfs_write(ap) NFS_BUF_MAP(bp); if (np->n_flag & NNOCACHE) - SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE)); + SET(bp->nb_flags, NB_NOCACHE); if (bp->nb_wcred == NOCRED) { kauth_cred_ref(cred); @@ -2201,7 +2208,7 @@ nfs_write(ap) char *d; int i; if (np->n_flag & NNOCACHE) - SET(eofbp->nb_flags, (NB_NOCACHE|NB_STABLE)); + SET(eofbp->nb_flags, NB_NOCACHE); NFS_BUF_MAP(eofbp); FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e); d = eofbp->nb_data; @@ -2466,9 +2473,14 @@ nfs_write(ap) } while (uio_uio_resid(uio) > 0 && n > 0); + if (np->n_flag & NNOCACHE) { + /* make sure all the buffers are flushed out */ + error = nfs_flush(vp, MNT_WAIT, cred, p, 0); + } + np->n_flag &= ~NWRBUSY; - FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), 0); - return (0); + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); + return (error); } /* @@ -3056,10 +3068,10 @@ nfs_doio(struct nfsbuf *bp, kauth_cred_t cr, proc_t p) /* compare page mask to nb_dirty; if there are other dirty pages */ /* then write FILESYNC; otherwise, write UNSTABLE if async and */ - /* not needcommit/nocache/call; otherwise write FILESYNC */ + /* not needcommit/stable; otherwise write FILESYNC */ if (bp->nb_dirty & ~pagemask) iomode = NFSV3WRITE_FILESYNC; - else if ((bp->nb_flags & (NB_ASYNC | NB_NEEDCOMMIT | NB_NOCACHE | NB_STABLE)) == NB_ASYNC) + else if ((bp->nb_flags & (NB_ASYNC | NB_NEEDCOMMIT | NB_STABLE)) == NB_ASYNC) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; @@ -3109,7 +3121,7 @@ nfs_doio(struct nfsbuf *bp, kauth_cred_t cr, proc_t p) * NB_NEEDCOMMIT flags. */ if (error == EINTR || (!error && bp->nb_flags & NB_NEEDCOMMIT)) { - CLR(bp->nb_flags, NB_INVAL | NB_NOCACHE); + CLR(bp->nb_flags, NB_INVAL); if (!ISSET(bp->nb_flags, NB_DELWRI)) { SET(bp->nb_flags, NB_DELWRI); OSAddAtomic(1, (SInt32*)&nfs_nbdwrite); diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index 36769aad4..398a83c77 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -93,7 +93,6 @@ nfs_nhinit(void) nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash); nfs_node_hash_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfs_node_hash_lck_grp_attr); nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", nfs_node_hash_lck_grp_attr); nfs_node_hash_lck_attr = lck_attr_alloc_init(); diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index d74605ded..ffad54c5e 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -1165,11 +1165,7 @@ nfsrv_write(nfsd, slp, procp, mrq) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); - /* - * Actually, there is no need to txdr these fields, - * but it may make the values more human readable, - * for debugging purposes. - */ + /* write verifier */ *tl++ = txdr_unsigned(boottime_sec()); *tl = txdr_unsigned(0); } else { @@ -1469,11 +1465,7 @@ nfsrv_writegather(ndp, slp, procp, mrq) nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsd->nd_len); *tl++ = txdr_unsigned(swp->nd_stable); - /* - * Actually, there is no need to txdr these fields, - * but it may make the values more human readable, - * for debugging purposes. - */ + /* write verifier */ *tl++ = txdr_unsigned(boottime_sec()); *tl = txdr_unsigned(0); } else { @@ -3727,6 +3719,7 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq) vnode_t vp, nvp; struct flrep fl; struct nfs_filehandle dnfh, *nfhp = (struct nfs_filehandle *)&fl.fl_fhsize; + u_long fhsize; struct nfs_export *nx; struct nfs_export_options *nxo; uio_t auio; @@ -3938,7 +3931,8 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq) */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); - fl.fl_fhsize = txdr_unsigned(nfhp->nfh_len); + fhsize = nfhp->nfh_len; + fl.fl_fhsize = txdr_unsigned(fhsize); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; if (vnopflag & VNODE_READDIR_SEEKOFF32) @@ -3983,7 +3977,7 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq) /* * Now copy the flrep structure out. */ - xfer = sizeof(struct flrep) - sizeof(fl.fl_nfh) + fl.fl_fhsize; + xfer = sizeof(struct flrep) - sizeof(fl.fl_nfh) + fhsize; cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index 2da38e94c..19372a501 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -310,7 +310,6 @@ nfs_bind_resv_nopriv(struct nfsmount *nmp) if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) { if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) { nfs_bind_resv_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfs_bind_resv_lck_grp_attr); nfs_bind_resv_lck_grp = lck_grp_alloc_init("nfs_bind_resv", nfs_bind_resv_lck_grp_attr); nfs_bind_resv_lck_attr = lck_attr_alloc_init(); nfs_bind_resv_mutex = lck_mtx_alloc_init(nfs_bind_resv_lck_grp, nfs_bind_resv_lck_attr); diff --git a/bsd/nfs/nfs_srvcache.c b/bsd/nfs/nfs_srvcache.c index a4ce111ae..c15362dad 100644 --- a/bsd/nfs/nfs_srvcache.c +++ b/bsd/nfs/nfs_srvcache.c @@ -164,7 +164,6 @@ nfsrv_initcache() { /* init nfs server request cache mutex */ nfsrv_reqcache_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfsrv_reqcache_lck_grp_attr); nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", nfsrv_reqcache_lck_grp_attr); nfsrv_reqcache_lck_attr = lck_attr_alloc_init(); nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, nfsrv_reqcache_lck_attr); diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index cfc3be25e..3b29e0670 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1214,7 +1214,6 @@ nfs_init(struct vfsconf *vfsp) } /* init nfsiod mutex */ nfs_iod_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfs_iod_lck_grp_attr); nfs_iod_lck_grp = lck_grp_alloc_init("nfs_iod", nfs_iod_lck_grp_attr); nfs_iod_lck_attr = lck_attr_alloc_init(); nfs_iod_mutex = lck_mtx_alloc_init(nfs_iod_lck_grp, nfs_iod_lck_attr); @@ -1226,7 +1225,6 @@ nfs_init(struct vfsconf *vfsp) #ifndef NFS_NOSERVER /* init nfsd mutex */ nfsd_lck_grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(nfsd_lck_grp_attr); nfsd_lck_grp = lck_grp_alloc_init("nfsd", nfsd_lck_grp_attr); nfsd_lck_attr = lck_attr_alloc_init(); nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, nfsd_lck_attr); @@ -2536,9 +2534,9 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) } /* grab file handle */ - nx->nx_fh.nfh_xh.nxh_version = NFS_FH_VERSION; - nx->nx_fh.nfh_xh.nxh_fsid = nx->nx_fs->nxfs_id; - nx->nx_fh.nfh_xh.nxh_expid = nx->nx_id; + nx->nx_fh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); + nx->nx_fh.nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id); + nx->nx_fh.nfh_xh.nxh_expid = htonl(nx->nx_id); nx->nx_fh.nfh_xh.nxh_flags = 0; nx->nx_fh.nfh_xh.nxh_reserved = 0; nx->nx_fh.nfh_len = NFS_MAX_FID_SIZE; @@ -2650,11 +2648,15 @@ static struct nfs_export * nfsrv_fhtoexport(struct nfs_filehandle *nfhp) { struct nfs_export *nx; - nx = NFSEXPHASH(nfhp->nfh_xh.nxh_fsid, nfhp->nfh_xh.nxh_expid)->lh_first; + uint32_t fsid, expid; + + fsid = ntohl(nfhp->nfh_xh.nxh_fsid); + expid = ntohl(nfhp->nfh_xh.nxh_expid); + nx = NFSEXPHASH(fsid, expid)->lh_first; for (; nx; nx = LIST_NEXT(nx, nx_hash)) { - if (nx->nx_fs->nxfs_id != nfhp->nfh_xh.nxh_fsid) + if (nx->nx_fs->nxfs_id != fsid) continue; - if (nx->nx_id != nfhp->nfh_xh.nxh_expid) + if (nx->nx_id != expid) continue; break; } @@ -2675,12 +2677,14 @@ nfsrv_fhtovp( { int error; struct mount *mp; + uint32_t v; *vpp = NULL; *nxp = NULL; *nxop = NULL; - if (nfhp->nfh_xh.nxh_version != NFS_FH_VERSION) { + v = ntohl(nfhp->nfh_xh.nxh_version); + if (v != NFS_FH_VERSION) { /* file handle format not supported */ return (ESTALE); } @@ -2688,7 +2692,8 @@ nfsrv_fhtovp( return (EBADRPC); if (nfhp->nfh_len < (int)sizeof(nfhp->nfh_xh)) return (ESTALE); - if (nfhp->nfh_xh.nxh_flags & NXHF_INVALIDFH) + v = ntohs(nfhp->nfh_xh.nxh_flags); + if (v & NXHF_INVALIDFH) return (ESTALE); /* XXX Revisit when enabling WebNFS */ @@ -2784,9 +2789,9 @@ nfsrv_vptofh( { int error; - nfhp->nfh_xh.nxh_version = NFS_FH_VERSION; - nfhp->nfh_xh.nxh_fsid = nx->nx_fs->nxfs_id; - nfhp->nfh_xh.nxh_expid = nx->nx_id; + nfhp->nfh_xh.nxh_version = htonl(NFS_FH_VERSION); + nfhp->nfh_xh.nxh_fsid = htonl(nx->nx_fs->nxfs_id); + nfhp->nfh_xh.nxh_expid = htonl(nx->nx_id); nfhp->nfh_xh.nxh_flags = 0; nfhp->nfh_xh.nxh_reserved = 0; @@ -2797,7 +2802,7 @@ nfsrv_vptofh( if (dnfhp && nfsrv_fhmatch(dnfhp, &nx->nx_fh)) { nfhp->nfh_len = v2 ? NFSX_V2FH : sizeof(nfhp->nfh_xh); nfhp->nfh_xh.nxh_fidlen = 0; - nfhp->nfh_xh.nxh_flags = NXHF_INVALIDFH; + nfhp->nfh_xh.nxh_flags = htons(NXHF_INVALIDFH); return (0); } diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index 8b685035b..89d01ada8 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -220,9 +220,9 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) } bzero(&nfh, sizeof(nfh)); - nfh.nfh_xh.nxh_version = NFS_FH_VERSION; - nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id; - nfh.nfh_xh.nxh_expid = nx->nx_id; + nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); + nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); + nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); nfh.nfh_xh.nxh_flags = 0; nfh.nfh_xh.nxh_reserved = 0; nfh.nfh_len = NFS_MAX_FID_SIZE; diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 944ce2be1..1fe75a4f5 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -125,7 +125,7 @@ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_generic_nfs_client, NFS_TPRINTF_DELAY, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); -static int nfs_iosize(struct nfsmount *nmp); +static int nfs_biosize(struct nfsmount *); static int mountnfs(struct user_nfs_args *,mount_t,mbuf_t,proc_t,vnode_t *); static int nfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t context); static int nfs_start(mount_t mp, int flags, vfs_context_t context); @@ -166,8 +166,8 @@ static int nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *); #endif /* NO_MOUNT_PRIVATE */ -static int nfs_iosize(nmp) - struct nfsmount* nmp; +static int +nfs_biosize(struct nfsmount *nmp) { int iosize; @@ -233,7 +233,7 @@ nfs_statfs(mount_t mp, struct vfsstatfs *sbp, vfs_context_t context) nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); sbp->f_flags = nmp->nm_flag; - sbp->f_iosize = nfs_iosize(nmp); + sbp->f_iosize = NFS_IOSIZE; if (v3) { /* * Adjust block size to get total block count to fit in a long. @@ -1114,8 +1114,8 @@ mountnfs( } if (nmp->nm_wsize > maxio) nmp->nm_wsize = maxio; - if (nmp->nm_wsize > MAXBSIZE) - nmp->nm_wsize = MAXBSIZE; + if (nmp->nm_wsize > NFS_MAXBSIZE) + nmp->nm_wsize = NFS_MAXBSIZE; if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; @@ -1126,8 +1126,8 @@ mountnfs( } if (nmp->nm_rsize > maxio) nmp->nm_rsize = maxio; - if (nmp->nm_rsize > MAXBSIZE) - nmp->nm_rsize = MAXBSIZE; + if (nmp->nm_rsize > NFS_MAXBSIZE) + nmp->nm_rsize = NFS_MAXBSIZE; if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; @@ -1224,7 +1224,8 @@ mountnfs( */ if (nmp->nm_flag & NFSMNT_NFSV3) nfs_fsinfo(nmp, *vpp, proc_ucred(p), p); - vfs_statfs(mp)->f_iosize = nfs_iosize(nmp); + nmp->nm_biosize = nfs_biosize(nmp); + vfs_statfs(mp)->f_iosize = NFS_IOSIZE; /* * V3 mounts give us a (relatively) reliable remote access(2) diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index 7704db2cb..b44eb45f2 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1031,8 +1031,10 @@ nfs_setattr(ap) { vnode_t vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp; struct vnode_attr *vap = ap->a_vap; int error = 0; + int biosize; u_quad_t tsize; kauth_cred_t cred; proc_t p; @@ -1040,6 +1042,10 @@ nfs_setattr(ap) #ifndef nolint tsize = (u_quad_t)0; #endif + nmp = VFSTONFS(vnode_mount(vp)); + if (!nmp) + return (ENXIO); + biosize = nmp->nm_biosize; /* Setting of flags is not supported. */ if (VATTR_IS_ACTIVE(vap, va_flags)) @@ -1101,10 +1107,9 @@ nfs_setattr(ap) } } else if (np->n_size > vap->va_data_size) { /* shrinking? */ daddr64_t obn, bn; - int biosize, neweofoff, mustwrite; + int neweofoff, mustwrite; struct nfsbuf *bp; - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; obn = (np->n_size - 1) / biosize; bn = vap->va_data_size / biosize; for ( ; obn >= bn; obn--) { @@ -4744,9 +4749,9 @@ nfs_pagein(ap) UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); return (ENXIO); } + biosize = nmp->nm_biosize; if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) - (void)nfs_fsinfo(nmp, vp, cred, p); - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + nfs_fsinfo(nmp, vp, cred, p); plinfo = ubc_upl_pageinfo(pl); ubc_upl_map(pl, &ioaddr); @@ -4883,7 +4888,7 @@ nfs_pageout(ap) ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY); return (ENXIO); } - biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + biosize = nmp->nm_biosize; /* * Check to see whether the buffer is incore. @@ -5140,12 +5145,11 @@ nfs_blktooff(ap) { int biosize; vnode_t vp = ap->a_vp; - mount_t mp = vnode_mount(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); - if (!mp) + if (!nmp) return (ENXIO); - - biosize = vfs_statfs(mp)->f_iosize; + biosize = nmp->nm_biosize; *ap->a_offset = (off_t)(ap->a_lblkno * biosize); @@ -5163,12 +5167,11 @@ nfs_offtoblk(ap) { int biosize; vnode_t vp = ap->a_vp; - mount_t mp = vnode_mount(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); - if (!mp) + if (!nmp) return (ENXIO); - - biosize = vfs_statfs(mp)->f_iosize; + biosize = nmp->nm_biosize; *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize); diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index 0c97699ad..8836c870d 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -90,6 +90,7 @@ struct nfsmount { int nm_timeouts; /* Request timeouts */ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ + int nm_biosize; /* buffer I/O size */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_acregmin; /* reg file min attr cache timeout */ diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index ada189445..ea95a3a07 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -125,6 +125,8 @@ struct nfsbuf { void * nb_pagelist; /* upl */ }; +#define NFS_MAXBSIZE (32 * PAGE_SIZE) /* valid/dirty page masks limit buffer size */ + /* * These flags are kept in b_lflags... * nfs_buf_mutex must be held before examining/updating diff --git a/bsd/ppc/_types.h b/bsd/ppc/_types.h index 337362194..c607bba87 100644 --- a/bsd/ppc/_types.h +++ b/bsd/ppc/_types.h @@ -33,7 +33,7 @@ typedef __signed char __int8_t; typedef char __int8_t; #endif /* !__GNUC__ */ typedef unsigned char __uint8_t; -typedef unsigned short __int16_t; +typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; diff --git a/bsd/ppc/types.h b/bsd/ppc/types.h index 58b77b5a3..543bfed53 100644 --- a/bsd/ppc/types.h +++ b/bsd/ppc/types.h @@ -118,6 +118,9 @@ typedef int64_t user_time_t; #define USER_ADDR_NULL ((user_addr_t) 0) #define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)((uintptr_t)(a_ptr))) +/* This defines the size of syscall arguments after copying into the kernel: */ +typedef u_int64_t syscall_arg_t; + #ifndef __offsetof #define __offsetof(type, field) ((size_t)(&((type *)0)->field)) #endif diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 946ec4094..5e10aaed1 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -36,7 +36,7 @@ DATAFILES = \ ttydefaults.h ttydev.h types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ user.h utfconv.h utsname.h vadvise.h vcmd.h version.h \ vm.h vmmeter.h vmparam.h vnioctl.h vnode.h vnode_if.h vstat.h wait.h xattr.h \ - _types.h _endian.h domain.h protosw.h + _types.h _endian.h domain.h protosw.h # Only in the framework PrivateHeader area PRIVATE_DATAFILES = \ @@ -46,6 +46,7 @@ PRIVATE_DATAFILES = \ shm_internal.h \ ux_exception.h \ ktrace.h \ + proc_info.h \ vnioctl.h # KERNELFILES will appear only in the kernel framework @@ -93,7 +94,7 @@ INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = sys -EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} syscall.h ktrace.h linker_set.h +EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} syscall.h ktrace.h linker_set.h bsdtask_info.h EXPORT_MI_DIR = sys diff --git a/bsd/sys/_endian.h b/bsd/sys/_endian.h index 8d0c683b8..6e34f2e70 100644 --- a/bsd/sys/_endian.h +++ b/bsd/sys/_endian.h @@ -83,25 +83,10 @@ /* * Macros for network/external number representation conversion. */ -#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN && !defined(lint) -#define ntohl(x) (x) -#define ntohs(x) (x) -#define htonl(x) (x) -#define htons(x) (x) - -#if defined(KERNEL) || !defined(_POSIX_C_SOURCE) -#define NTOHL(x) (x) -#define NTOHS(x) (x) -#define HTONL(x) (x) -#define HTONS(x) (x) -#endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ - -#else - #if !defined(__ASSEMBLER__) #include -#include +#include __BEGIN_DECLS uint16_t ntohs(uint16_t); @@ -111,16 +96,11 @@ uint32_t htonl(uint32_t); __END_DECLS #endif /* !defined(__ASSEMBLER__) */ -#define ntohs(x) NXSwapBigShortToHost(x) -#define htons(x) NXSwapHostShortToBig(x) +#define ntohs(x) OSSwapBigToHostInt16(x) +#define htons(x) OSSwapHostToBigInt16(x) -#if defined(__LP64__) -#define ntohl(x) NXSwapBigIntToHost(x) -#define htonl(x) NXSwapHostIntToBig(x) -#else -#define ntohl(x) NXSwapBigLongToHost(x) -#define htonl(x) NXSwapHostLongToBig(x) -#endif /* defined(__LP64__) */ +#define ntohl(x) OSSwapBigToHostInt32(x) +#define htonl(x) OSSwapHostToBigInt32(x) #if defined(KERNEL) || !defined(_POSIX_C_SOURCE) #define NTOHL(x) (x) = ntohl((u_long)x) @@ -128,5 +108,4 @@ __END_DECLS #define HTONL(x) (x) = htonl((u_long)x) #define HTONS(x) (x) = htons((u_short)x) #endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ -#endif /* __DARWIN_BYTE_ORDER != __DARWIN_BIG_ENDIAN || defined(lint) */ #endif /* !_SYS__ENDIAN_H_ */ diff --git a/bsd/sys/aio.h b/bsd/sys/aio.h index f2d41b32c..a8a149866 100644 --- a/bsd/sys/aio.h +++ b/bsd/sys/aio.h @@ -47,24 +47,16 @@ struct aiocb { // LP64todo - should this move? #ifdef KERNEL -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_aiocb { int aio_fildes; /* File descriptor */ off_t aio_offset; /* File offset */ - user_addr_t aio_buf; /* Location of buffer */ + user_addr_t aio_buf __attribute((aligned(8))); /* Location of buffer */ user_size_t aio_nbytes; /* Length of transfer */ int aio_reqprio; /* Request priority offset */ - struct user_sigevent aio_sigevent; /* Signal number and value */ + struct user_sigevent aio_sigevent __attribute((aligned(8))); /* Signal number and value */ int aio_lio_opcode; /* Operation to be performed */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL /* diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index 80a7512c2..e2b97fb67 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -378,27 +378,19 @@ struct fssearchblock { */ // LP64todo - should this move? -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_fssearchblock { user_addr_t returnattrs; user_addr_t returnbuffer; user_size_t returnbuffersize; user_ulong_t maxmatches; - struct timeval timelimit; - user_addr_t searchparams1; + struct user_timeval timelimit; + user_addr_t searchparams1 __attribute((aligned(8))); user_size_t sizeofsearchparams1; user_addr_t searchparams2; user_size_t sizeofsearchparams2; struct attrlist searchattrs; }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL diff --git a/bsd/sys/bsdtask_info.h b/bsd/sys/bsdtask_info.h new file mode 100644 index 000000000..a18c56fc8 --- /dev/null +++ b/bsd/sys/bsdtask_info.h @@ -0,0 +1,108 @@ + +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_BSDTASK_INFO_H +#define _SYS_BSDTASK_INFO_H + +struct proc_taskinfo_internal { + uint64_t pti_virtual_size; /* virtual memory size (bytes) */ + uint64_t pti_resident_size; /* resident memory size (bytes) */ + uint64_t pti_total_user; /* total time */ + uint64_t pti_total_system; + uint64_t pti_threads_user; /* existing threads only */ + uint64_t pti_threads_system; + int32_t pti_policy; /* default policy for new threads */ + int32_t pti_faults; /* number of page faults */ + int32_t pti_pageins; /* number of actual pageins */ + int32_t pti_cow_faults; /* number of copy-on-write faults */ + int32_t pti_messages_sent; /* number of messages sent */ + int32_t pti_messages_received; /* number of messages received */ + int32_t pti_syscalls_mach; /* number of mach system calls */ + int32_t pti_syscalls_unix; /* number of unix system calls */ + int32_t pti_csw; /* number of context switches */ + int32_t pti_threadnum; /* number of threads in the task */ + int32_t pti_numrunning; /* number of running threads */ + int32_t pti_priority; /* task priority*/ +}; + + +struct proc_threadinfo_internal { + uint64_t pth_user_time; /* user run time */ + uint64_t pth_system_time; /* system run time */ + int32_t pth_cpu_usage; /* scaled cpu usage percentage */ + int32_t pth_policy; /* scheduling policy in effect */ + int32_t pth_run_state; /* run state (see below) */ + int32_t pth_flags; /* various flags (see below) */ + int32_t pth_sleep_time; /* number of seconds that thread */ + int32_t pth_curpri; /* cur priority*/ + int32_t pth_priority; /* priority*/ + int32_t pth_maxpriority; /* max priority*/ +}; + + + +struct proc_regioninfo_internal { + uint32_t pri_protection; + uint32_t pri_max_protection; + uint32_t pri_inheritance; + uint32_t pri_flags; /* shared, external pager, is submap */ + uint64_t pri_offset; + uint32_t pri_behavior; + uint32_t pri_user_wired_count; + uint32_t pri_user_tag; + uint32_t pri_pages_resident; + uint32_t pri_pages_shared_now_private; + uint32_t pri_pages_swapped_out; + uint32_t pri_pages_dirtied; + uint32_t pri_ref_count; + uint32_t pri_shadow_depth; + uint32_t pri_share_mode; + uint32_t pri_private_pages_resident; + uint32_t pri_shared_pages_resident; + uint32_t pri_obj_id; + uint64_t pri_address; + uint64_t pri_size; + uint32_t pri_depth; +}; + +#ifdef MACH_KERNEL_PRIVATE + +#define PROC_REGION_SUBMAP 1 +#define PROC_REGION_SHARED 2 + +void vm_map_region_top_walk(vm_map_entry_t entry, vm_region_top_info_t top); +void vm_map_region_walk(vm_map_t map, vm_map_offset_t a, vm_map_entry_t entry, vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended); +kern_return_t vnode_pager_get_object_vnode(memory_object_t mem_obj,uint32_t * vnodeaddr, uint32_t * vid); +extern uint32_t vnode_vid(void *vp); + +#endif /* MACH_KERNEL_PRIVATE */ + +extern int fill_procregioninfo(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vp, uint32_t *vid); +void fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo); +int fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_internal * ptinfo); +int fill_taskthreadlist(task_t task, void * buffer, int thcount); +int get_numthreads(task_t); + + +#endif /*_SYS_BSDTASK_INFO_H */ + diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index e06f99253..ec3a84dcc 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -74,7 +74,6 @@ #include -extern lck_mtx_t *buf_mtxp; #define NOLIST ((struct buf *)0x87654321) /* @@ -189,6 +188,8 @@ struct buf { extern int niobuf; /* The number of IO buffer headers for cluster IO */ extern int nbuf; /* The number of buffer headers */ extern struct buf *buf; /* The buffer headers. */ +extern int max_nbuf_headers; /* The max number of buffer headers */ +extern int nbuf_hashelements; /* The number of elements in bufhash */ /* diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 46dc8ec7f..46607547e 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -327,7 +327,7 @@ # define __DARWIN_LDBL_COMPAT2(x) /* nothing */ # define __DARWIN_LONG_DOUBLE_IS_DOUBLE 1 # endif -#elif defined(__i386__) || defined(__ppc64__) +#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__) # define __DARWIN_LDBL_COMPAT(x) /* nothing */ # define __DARWIN_LDBL_COMPAT2(x) /* nothing */ # define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0 @@ -335,26 +335,4 @@ # error Unknown architecture #endif -/* - * Structure alignment control macros. These specify how certain - * shared structures should be aligned. Some may need backward - * compatible legacy (POWER) alignment, while others may need - * forward compatible (NATURAL) alignment. - */ -#if !defined(__DARWIN_ALIGN_POWER) -#if defined(__ppc64__) -#define __DARWIN_ALIGN_POWER 1 -#else -#define __DARWIN_ALIGN_POWER 0 -#endif -#endif /* __DARWIN_ALIGN_POWER */ - -#if !defined(__DARWIN_ALIGN_NATURAL) -#if defined(__ppc__) && defined(KERNEL) -#define __DARWIN_ALIGN_NATURAL 1 -#else -#define __DARWIN_ALIGN_NATURAL 0 -#endif -#endif /* __DARWIN_ALIGN_NATURAL */ - #endif /* !_CDEFS_H_ */ diff --git a/bsd/sys/dirent.h b/bsd/sys/dirent.h index 1b4d5e501..7a0732739 100644 --- a/bsd/sys/dirent.h +++ b/bsd/sys/dirent.h @@ -79,9 +79,7 @@ typedef __darwin_ino_t ino_t; /* inode number */ #define __DARWIN_MAXNAMLEN 255 -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct dirent { ino_t d_ino; /* file number of entry */ @@ -91,9 +89,7 @@ struct dirent { char d_name[__DARWIN_MAXNAMLEN + 1]; /* name must be no longer than this */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #ifdef KERNEL #include diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 6e3d535ef..f7e627b64 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -30,34 +30,36 @@ /* * Definitions * - * ioctl description - * -------------------------------- -------------------------------------------- - * DKIOCEJECT eject media - * DKIOCSYNCHRONIZECACHE flush media + * ioctl description + * ------------------------------------- --------------------------------------- + * DKIOCEJECT eject media + * DKIOCSYNCHRONIZECACHE flush media * - * DKIOCFORMAT format media - * DKIOCGETFORMATCAPACITIES get media's formattable capacities + * DKIOCFORMAT format media + * DKIOCGETFORMATCAPACITIES get media's formattable capacities * - * DKIOCGETBLOCKSIZE get media's block size - * DKIOCGETBLOCKCOUNT get media's block count - * DKIOCGETFIRMWAREPATH get media's firmware path + * DKIOCGETBLOCKSIZE get media's block size + * DKIOCGETBLOCKCOUNT get media's block count + * DKIOCGETFIRMWAREPATH get media's firmware path * - * DKIOCISFORMATTED is media formatted? - * DKIOCISWRITABLE is media writable? + * DKIOCISFORMATTED is media formatted? + * DKIOCISWRITABLE is media writable? * - * DKIOCGETMAXBLOCKCOUNTREAD get maximum block count for reads - * DKIOCGETMAXBLOCKCOUNTWRITE get maximum block count for writes - * DKIOCGETMAXBYTECOUNTREAD get maximum byte count for reads - * DKIOCGETMAXBYTECOUNTWRITE get maximum byte count for writes - * DKIOCGETMAXSEGMENTCOUNTREAD get maximum segment count for reads - * DKIOCGETMAXSEGMENTCOUNTWRITE get maximum segment count for writes - * DKIOCGETMAXSEGMENTBYTECOUNTREAD get maximum segment byte count for reads - * DKIOCGETMAXSEGMENTBYTECOUNTWRITE get maximum segment byte count for writes + * DKIOCGETMAXBLOCKCOUNTREAD get maximum block count for reads + * DKIOCGETMAXBLOCKCOUNTWRITE get maximum block count for writes + * DKIOCGETMAXBYTECOUNTREAD get maximum byte count for reads + * DKIOCGETMAXBYTECOUNTWRITE get maximum byte count for writes + * + * DKIOCGETMAXSEGMENTCOUNTREAD get maximum segment count for reads + * DKIOCGETMAXSEGMENTCOUNTWRITE get maximum segment count for writes + * DKIOCGETMAXSEGMENTBYTECOUNTREAD get maximum segment byte count for reads + * DKIOCGETMAXSEGMENTBYTECOUNTWRITE get maximum segment byte count for writes + * + * DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT get minimum segment alignment in bytes + * DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) typedef struct { @@ -81,38 +83,40 @@ typedef struct u_int8_t reserved0064[8]; /* reserved, clear to zero */ } dk_format_capacities_t; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() + +#define DKIOCEJECT _IO('d', 21) +#define DKIOCSYNCHRONIZECACHE _IO('d', 22) + +#define DKIOCFORMAT _IOW('d', 26, dk_format_capacity_t) +#define DKIOCGETFORMATCAPACITIES _IOWR('d', 26, dk_format_capacities_t) -#define DKIOCEJECT _IO('d', 21) -#define DKIOCSYNCHRONIZECACHE _IO('d', 22) +#define DKIOCGETBLOCKSIZE _IOR('d', 24, u_int32_t) +#define DKIOCGETBLOCKCOUNT _IOR('d', 25, u_int64_t) +#define DKIOCGETFIRMWAREPATH _IOR('d', 28, dk_firmware_path_t) -#define DKIOCFORMAT _IOW('d', 26, dk_format_capacity_t) -#define DKIOCGETFORMATCAPACITIES _IOWR('d', 26, dk_format_capacities_t) +#define DKIOCISFORMATTED _IOR('d', 23, u_int32_t) +#define DKIOCISWRITABLE _IOR('d', 29, u_int32_t) -#define DKIOCGETBLOCKSIZE _IOR('d', 24, u_int32_t) -#define DKIOCGETBLOCKCOUNT _IOR('d', 25, u_int64_t) -#define DKIOCGETFIRMWAREPATH _IOR('d', 28, dk_firmware_path_t) +#define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, u_int64_t) +#define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, u_int64_t) +#define DKIOCGETMAXBYTECOUNTREAD _IOR('d', 70, u_int64_t) +#define DKIOCGETMAXBYTECOUNTWRITE _IOR('d', 71, u_int64_t) -#define DKIOCISFORMATTED _IOR('d', 23, u_int32_t) -#define DKIOCISWRITABLE _IOR('d', 29, u_int32_t) +#define DKIOCGETMAXSEGMENTCOUNTREAD _IOR('d', 66, u_int64_t) +#define DKIOCGETMAXSEGMENTCOUNTWRITE _IOR('d', 67, u_int64_t) +#define DKIOCGETMAXSEGMENTBYTECOUNTREAD _IOR('d', 68, u_int64_t) +#define DKIOCGETMAXSEGMENTBYTECOUNTWRITE _IOR('d', 69, u_int64_t) -#define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, u_int64_t) -#define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, u_int64_t) -#define DKIOCGETMAXBYTECOUNTREAD _IOR('d', 70, u_int64_t) -#define DKIOCGETMAXBYTECOUNTWRITE _IOR('d', 71, u_int64_t) -#define DKIOCGETMAXSEGMENTCOUNTREAD _IOR('d', 66, u_int64_t) -#define DKIOCGETMAXSEGMENTCOUNTWRITE _IOR('d', 67, u_int64_t) -#define DKIOCGETMAXSEGMENTBYTECOUNTREAD _IOR('d', 68, u_int64_t) -#define DKIOCGETMAXSEGMENTBYTECOUNTWRITE _IOR('d', 69, u_int64_t) +#define DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT _IOR('d', 74, u_int64_t) +#define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, u_int64_t) #ifdef KERNEL -#define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, u_int32_t) -#define DKIOCSETBLOCKSIZE _IOW('d', 24, u_int32_t) -#define DKIOCGETBSDUNIT _IOR('d', 27, u_int32_t) -#define DKIOCISVIRTUAL _IOR('d', 72, u_int32_t) -#define DKIOCGETBASE _IOR('d', 73, u_int64_t) +#define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, u_int32_t) +#define DKIOCSETBLOCKSIZE _IOW('d', 24, u_int32_t) +#define DKIOCGETBSDUNIT _IOR('d', 27, u_int32_t) +#define DKIOCISVIRTUAL _IOR('d', 72, u_int32_t) +#define DKIOCGETBASE _IOR('d', 73, u_int64_t) #endif /* KERNEL */ #endif /* _SYS_DISK_H_ */ diff --git a/bsd/sys/domain.h b/bsd/sys/domain.h index c55eaeccd..3a5833774 100644 --- a/bsd/sys/domain.h +++ b/bsd/sys/domain.h @@ -78,9 +78,7 @@ struct mbuf; #define DOM_REENTRANT 0x01 -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct domain { int dom_family; /* AF_xxx */ @@ -107,9 +105,7 @@ struct domain { u_long reserved[2]; }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #ifdef KERNEL extern struct domain *domains; diff --git a/bsd/sys/event.h b/bsd/sys/event.h index 9f8d6c00a..72d71ceca 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -67,9 +67,7 @@ #define EVFILT_SYSCOUNT 9 #define EVFILT_THREADMARKER EVFILT_SYSCOUNT /* Internal use only */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct kevent { uintptr_t ident; /* identifier for this event */ @@ -97,9 +95,7 @@ struct user_kevent { #endif -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp__ = (kevp); \ diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index f5f7ba1ee..2ceeb325b 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -144,6 +144,10 @@ typedef __darwin_pid_t pid_t; #define FWASWRITTEN 0x10000 /* descriptor was written */ #endif +#ifndef _POSIX_C_SOURCE +#define O_DIRECTORY 0x100000 +#endif + /* defined by POSIX 1003.1; BSD default, so no bit required */ #define O_NOCTTY 0 /* don't assign controlling terminal */ //#define O_SYNC /* ??? POSIX: Write according to synchronized I/O file integrity completion */ @@ -359,20 +363,12 @@ typedef struct fbootstraptransfer { * WARNING - keep in sync with fbootstraptransfer */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - typedef struct user_fbootstraptransfer { off_t fbt_offset; /* IN: offset to start read/write */ user_size_t fbt_length; /* IN: number of bytes to transfer */ user_addr_t fbt_buffer; /* IN: buffer to be read/written */ } user_fbootstraptransfer_t; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL /* @@ -391,9 +387,7 @@ typedef struct user_fbootstraptransfer { * and a per filesystem type flag will be needed to interpret the * contiguous bytes count result from CMAP. */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct log2phys { unsigned int l2p_flags; /* unused so far */ @@ -401,9 +395,7 @@ struct log2phys { off_t l2p_devoffset; /* bytes into device */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #define O_POPUP 0x80000000 /* force window to popup on open */ #define O_ALERT 0x20000000 /* small, clean popup window */ diff --git a/bsd/sys/file.h b/bsd/sys/file.h index 710159af8..492d64aca 100644 --- a/bsd/sys/file.h +++ b/bsd/sys/file.h @@ -70,9 +70,7 @@ #include #endif -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) /* for the compat sake; */ struct extern_file { @@ -87,9 +85,7 @@ struct extern_file { caddr_t f_data; /* vnode or socket or SHM or semaphore */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #ifdef KERNEL __BEGIN_DECLS diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h index 4656bab00..f9a57d390 100644 --- a/bsd/sys/file_internal.h +++ b/bsd/sys/file_internal.h @@ -191,8 +191,15 @@ struct kqueue; int fp_getfkq(struct proc *p, int fd, struct fileproc **resultfp, struct kqueue **resultkq); struct psemnode; int fp_getfpsem(struct proc *p, int fd, struct fileproc **resultfp, struct psemnode **resultpsem); +struct pshmnode; +int fp_getfpshm(struct proc *p, int fd, struct fileproc **resultfp, struct pshmnode **resultpshm); +struct pipe; +int fp_getfpipe(struct proc *p, int fd, struct fileproc **resultfp, struct pipe **resultpipe); +struct atalk; +int fp_getfatalk(struct proc *p, int fd, struct fileproc **resultfp, struct atalk **resultatalk); struct vnode; int fp_getfvp(struct proc *p, int fd, struct fileproc **resultfp, struct vnode **resultvp); +int fp_getfvpandvid(struct proc *p, int fd, struct fileproc **resultfp, struct vnode **resultvp, uint32_t * vidp); struct socket; int fp_getfsock(struct proc *p, int fd, struct fileproc **resultfp, struct socket **results); int fp_lookup(struct proc *p, int fd, struct fileproc **resultfp, int locked); diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index 7a6920171..58deac06c 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -79,7 +79,7 @@ struct image_params { user_size_t ip_arch_size; /* subfile length in ip_vp */ char ip_interp_name[IMG_SHSIZE]; /* interpreter name */ - /* Next two fields are for support of Classic... */ + /* Next two fields are for support of architecture translation... */ char *ip_p_comm; /* optional alt p->p_comm */ char *ip_tws_cache_name; /* task working set cache */ struct vfs_context *ip_vfs_context; /* VFS context */ @@ -92,7 +92,11 @@ struct image_params { */ #define IMGPF_NONE 0x00000000 /* No flags */ #define IMGPF_INTERPRET 0x00000001 /* Interpreter invoked */ +#if defined (__i386__) || defined(__x86_64__) +#define IMGPF_POWERPC 0x00000002 /* ppc mode */ +#else #define IMGPF_RESERVED1 0x00000002 /* reserved */ +#endif #define IMGPF_WAS_64BIT 0x00000004 /* exec from a 64Bit binary */ #define IMGPF_IS_64BIT 0x00000008 /* exec to a 64Bit binary */ diff --git a/bsd/sys/ipcs.h b/bsd/sys/ipcs.h index e4a6e23f4..7592dc951 100644 --- a/bsd/sys/ipcs.h +++ b/bsd/sys/ipcs.h @@ -53,10 +53,6 @@ struct IPCS_command { #ifdef KERNEL_PRIVATE #include -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_IPCS_command { int ipcs_magic; /* Magic number for struct layout */ int ipcs_op; /* Operation to perform */ @@ -65,10 +61,6 @@ struct user_IPCS_command { user_addr_t ipcs_data; /* OP specific data */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL_PRIVATE */ /* diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h index eb87187e9..33351756b 100644 --- a/bsd/sys/kauth.h +++ b/bsd/sys/kauth.h @@ -355,10 +355,15 @@ typedef struct kauth_filesec *kauth_filesec_t; #define KAUTH_FILESEC_XATTR "com.apple.system.Security" +/* Allowable first arguments to kauth_filesec_acl_setendian() */ +#define KAUTH_ENDIAN_HOST 0x00000001 /* set host endianness */ +#define KAUTH_ENDIAN_DISK 0x00000002 /* set disk endianness */ + __BEGIN_DECLS kauth_filesec_t kauth_filesec_alloc(int size); void kauth_filesec_free(kauth_filesec_t fsp); int kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp); + void kauth_filesec_acl_setendian(int, kauth_filesec_t, kauth_acl_t); __END_DECLS #endif /* KERNEL || */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 28f6456c7..ee7e6b2d3 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -67,29 +67,31 @@ __BEGIN_DECLS /* The Kernel Debug Classes */ -#define DBG_MACH 1 -#define DBG_NETWORK 2 -#define DBG_FSYSTEM 3 -#define DBG_BSD 4 -#define DBG_IOKIT 5 -#define DBG_DRIVERS 6 -#define DBG_TRACE 7 +#define DBG_MACH 1 +#define DBG_NETWORK 2 +#define DBG_FSYSTEM 3 +#define DBG_BSD 4 +#define DBG_IOKIT 5 +#define DBG_DRIVERS 6 +#define DBG_TRACE 7 #define DBG_DLIL 8 #define DBG_SECURITY 9 -#define DBG_MISC 20 -#define DBG_DYLD 31 -#define DBG_QT 32 -#define DBG_APPS 33 -#define DBG_MIG 255 +#define DBG_MISC 20 +#define DBG_DYLD 31 +#define DBG_QT 32 +#define DBG_APPS 33 +#define DBG_MIG 255 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ +#define DBG_MACH_EXCP_KTRAP_x86 0x02 /* Kernel Traps on x86 */ #define DBG_MACH_EXCP_DFLT 0x03 /* Data Translation Fault */ #define DBG_MACH_EXCP_IFLT 0x04 /* Inst Translation Fault */ #define DBG_MACH_EXCP_INTR 0x05 /* Interrupts */ #define DBG_MACH_EXCP_ALNG 0x06 /* Alignment Exception */ -#define DBG_MACH_EXCP_TRAP 0x07 /* Traps */ +#define DBG_MACH_EXCP_UTRAP_x86 0x07 /* User Traps on x86 */ #define DBG_MACH_EXCP_FP 0x08 /* FP Unavail */ #define DBG_MACH_EXCP_DECI 0x09 /* Decrementer Interrupt */ +#define DBG_MACH_CHUD 0x0A /* CHUD */ #define DBG_MACH_EXCP_SC 0x0C /* System Calls */ #define DBG_MACH_EXCP_TRACE 0x0D /* Trace exception */ #define DBG_MACH_EXCP_EMUL 0x0E /* Instruction emulated */ @@ -139,33 +141,59 @@ __BEGIN_DECLS #define DBG_NETIPSEC 128 /* IPsec Protocol */ /* **** The Kernel Debug Sub Classes for IOKIT (DBG_IOKIT) **** */ -#define DBG_IOSCSI 1 /* SCSI */ -#define DBG_IODISK 2 /* Disk layers */ -#define DBG_IONETWORK 3 /* Network layers */ -#define DBG_IOKEYBOARD 4 /* Keyboard */ -#define DBG_IOPOINTING 5 /* Pointing Devices */ -#define DBG_IOAUDIO 6 /* Audio */ -#define DBG_IOFLOPPY 7 /* Floppy */ -#define DBG_IOSERIAL 8 /* Serial */ -#define DBG_IOTTY 9 /* TTY layers */ -#define DBG_IOWORKLOOP 10 /* Work from work loop */ -#define DBG_IOINTES 11 /* Interrupt event source */ -#define DBG_IOCLKES 12 /* Clock event source */ -#define DBG_IOCMDQ 13 /* Command queue latencies */ -#define DBG_IOMCURS 14 /* Memory Cursor */ -#define DBG_IOMDESC 15 /* Memory Descriptors */ -#define DBG_IOPOWER 16 /* Power Managerment */ +#define DBG_IOWORKLOOP 1 /* Work from work loop */ +#define DBG_IOINTES 2 /* Interrupt event source */ +#define DBG_IOCLKES 3 /* Clock event source */ +#define DBG_IOCMDQ 4 /* Command queue latencies */ +#define DBG_IOMCURS 5 /* Memory Cursor */ +#define DBG_IOMDESC 6 /* Memory Descriptors */ +#define DBG_IOPOWER 7 /* Power Managerment */ + +/* **** 8-32 reserved for internal IOKit usage **** */ + +#define DBG_IOSTORAGE 32 /* Storage layers */ +#define DBG_IONETWORK 33 /* Network layers */ +#define DBG_IOKEYBOARD 34 /* Keyboard */ +#define DBG_IOHID 35 /* HID Devices */ +#define DBG_IOAUDIO 36 /* Audio */ +#define DBG_IOSERIAL 37 /* Serial */ +#define DBG_IOTTY 38 /* TTY layers */ +#define DBG_IOSAM 39 /* SCSI Architecture Model layers */ +#define DBG_IOPARALLELATA 40 /* Parallel ATA */ +#define DBG_IOPARALLELSCSI 41 /* Parallel SCSI */ +#define DBG_IOSATA 42 /* Serial-ATA */ +#define DBG_IOSAS 43 /* SAS */ +#define DBG_IOFIBRECHANNEL 44 /* FiberChannel */ +#define DBG_IOUSB 45 /* USB */ +#define DBG_IOBLUETOOTH 46 /* Bluetooth */ +#define DBG_IOFIREWIRE 47 /* FireWire */ +#define DBG_IOINFINIBAND 48 /* Infiniband */ + +/* Backwards compatibility */ +#define DBG_IOPOINTING DBG_IOHID /* OBSOLETE: Use DBG_IOHID instead */ +#define DBG_IODISK DBG_IOSTORAGE /* OBSOLETE: Use DBG_IOSTORAGE instead */ /* **** The Kernel Debug Sub Classes for Device Drivers (DBG_DRIVERS) **** */ -#define DBG_DRVSCSI 1 /* SCSI */ -#define DBG_DRVDISK 2 /* Disk layers */ -#define DBG_DRVNETWORK 3 /* Network layers */ -#define DBG_DRVKEYBOARD 4 /* Keyboard */ -#define DBG_DRVPOINTING 5 /* Pointing Devices */ -#define DBG_DRVAUDIO 6 /* Audio */ -#define DBG_DRVFLOPPY 7 /* Floppy */ -#define DBG_DRVSERIAL 8 /* Serial */ -#define DBG_DRVSPLT 9 +#define DBG_DRVSTORAGE 1 /* Storage layers */ +#define DBG_DRVNETWORK 2 /* Network layers */ +#define DBG_DRVKEYBOARD 3 /* Keyboard */ +#define DBG_DRVHID 4 /* HID Devices */ +#define DBG_DRVAUDIO 5 /* Audio */ +#define DBG_DRVSERIAL 7 /* Serial */ +#define DBG_DRVSAM 8 /* SCSI Architecture Model layers */ +#define DBG_DRVPARALLELATA 9 /* Parallel ATA */ +#define DBG_DRVPARALLELSCSI 10 /* Parallel SCSI */ +#define DBG_DRVSATA 11 /* Serial ATA */ +#define DBG_DRVSAS 12 /* SAS */ +#define DBG_DRVFIBRECHANNEL 13 /* FiberChannel */ +#define DBG_DRVUSB 14 /* USB */ +#define DBG_DRVBLUETOOTH 15 /* Bluetooth */ +#define DBG_DRVFIREWIRE 16 /* FireWire */ +#define DBG_DRVINFINIBAND 17 /* Infiniband */ + +/* Backwards compatibility */ +#define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */ +#define DBG_DRVDISK DBG_DRVSTORAGE /* OBSOLETE: Use DBG_DRVSTORAGE instead */ /* **** The Kernel Debug Sub Classes for the DLIL Layer (DBG_DLIL) **** */ #define DBG_DLIL_STATIC 1 /* Static DLIL code */ @@ -189,6 +217,10 @@ __BEGIN_DECLS #define DBG_TRACE_DATA 0 #define DBG_TRACE_STRING 1 +/* The Kernel Debug Sub Classes for DBG_MISC */ +#define DBG_EVENT 0x10 +#define DBG_BUFFER 0x20 + /* The Kernel Debug Sub Classes for DBG_DYLD */ #define DBG_DYLD_STRING 5 @@ -270,6 +302,7 @@ extern void kernel_debug1(unsigned int debugid, unsigned int arg1, unsigned int /* * LP64todo - for some reason these are problematic */ +struct proc; extern void kdbg_trace_data(struct proc *proc, long *arg_pid); extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4); diff --git a/bsd/sys/kernel_types.h b/bsd/sys/kernel_types.h index 89eb0ed0b..c4d9a60be 100644 --- a/bsd/sys/kernel_types.h +++ b/bsd/sys/kernel_types.h @@ -33,7 +33,7 @@ typedef struct mount * mount_t; #ifdef TBDDDD typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ -#endif TBDDDD +#endif /* TBDDDD */ struct vnode; typedef struct vnode * vnode_t; diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h index 13c56414f..452264dcd 100644 --- a/bsd/sys/kpi_socket.h +++ b/bsd/sys/kpi_socket.h @@ -33,6 +33,7 @@ #include #include +#include struct timeval; diff --git a/bsd/sys/ktrace.h b/bsd/sys/ktrace.h index f07a9a8d1..61787233d 100644 --- a/bsd/sys/ktrace.h +++ b/bsd/sys/ktrace.h @@ -63,8 +63,8 @@ #ifdef MACH_KERNEL_PRIVATE -void ktrsyscall(void *, int, int, u_int64_t *, int); -void ktrsysret(void *, int, int, int, int); +void ktrsyscall(void *, int, int, void *); +void ktrsysret(void *, int, int, int); #else #ifdef __APPLE_API_UNSTABLE @@ -197,7 +197,7 @@ void ktrnamei(struct vnode *,char *); void ktrcsw(struct vnode *, int, int); void ktrpsig(struct vnode *, int, sig_t, sigset_t *, int); void ktrgenio(struct vnode *, int, enum uio_rw, struct uio *, int); -void ktrsyscall(struct proc *, int, int, u_int64_t args[]); +void ktrsyscall(struct proc *, int, int, syscall_arg_t args[]); void ktrsysret(struct proc *, int, int, register_t); #endif /* __APPLE_API_PRIVATE */ #else diff --git a/bsd/sys/lock.h b/bsd/sys/lock.h index 5364f66e1..ec098b61a 100644 --- a/bsd/sys/lock.h +++ b/bsd/sys/lock.h @@ -70,13 +70,6 @@ #include - -#if defined(thread_sleep_simple_lock) -#undef thread_sleep_simple_lock -#endif -#define thread_sleep_simple_lock(l, e, i) thread_sleep_funnel((e), (i)) - - #endif /* KERNEL */ #ifdef BSD_KERNEL_PRIVATE @@ -105,10 +98,6 @@ struct lock__bsd__ { * WARNING - keep in sync with lock__bsd__ */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_lock__bsd__ { user_addr_t lk_interlock[10]; /* lock on remaining fields */ u_int lk_flags; /* see below */ @@ -122,10 +111,6 @@ struct user_lock__bsd__ { user_addr_t lk_lockthread; /* thread which acquired excl lock */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - /* * Lock request types: * LK_SHARED - get one of many possible shared locks. If a process diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index 7c3e814d0..1c4e21a95 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -75,9 +75,7 @@ MALLOC_DECLARE(M_LOCKF); */ TAILQ_HEAD(locklist, lockf); -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct lockf { short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ @@ -92,9 +90,7 @@ struct lockf { TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() /* Maximum length of sleep chains to traverse to try and detect deadlock. */ #define MAXDEPTH 50 diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index 58d5cc0ea..e2078d904 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -113,9 +113,7 @@ struct statfs { #define MFSTYPENAMELEN 16 /* length of fs type name including null */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct vfsstatfs { uint32_t f_bsize; /* fundamental file system block size */ @@ -136,9 +134,7 @@ struct vfsstatfs { void *f_reserved[2]; /* For future use == 0 */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #define VFSATTR_INIT(s) ((s)->f_supported = (s)->f_active = 0LL) #define VFSATTR_SET_SUPPORTED(s, a) ((s)->f_supported |= VFSATTR_ ## a) @@ -177,9 +173,7 @@ struct vfsstatfs { /* * New VFS_STAT argument structure. */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct vfs_attr { uint64_t f_supported; @@ -217,9 +211,7 @@ struct vfs_attr { uint16_t f_carbon_fsid; /* same as Carbon's FSVolumeInfo.filesystemID */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() /* * User specifiable flags. @@ -344,32 +336,24 @@ struct vfsidctl { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with vfsconf */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_vfsconf { user_addr_t vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ - user_addr_t vfc_mountroot; /* if != NULL, routine to mount root */ + user_addr_t vfc_mountroot __attribute((aligned(8))); /* if != NULL, routine to mount root */ user_addr_t vfc_next; /* next in list */ }; struct user_vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on. */ - user_addr_t vc_ptr; /* pointer to data structure. */ + user_addr_t vc_ptr __attribute((aligned(8))); /* pointer to data structure. */ user_size_t vc_len; /* sizeof said structure. */ u_int32_t vc_spare[12]; /* spare (must be zero). */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL */ /* diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index 8eacce4ea..c8a837ea0 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -226,14 +226,10 @@ struct vfsmount_args { * LP64 version of statfs structure. * NOTE - must be kept in sync with struct statfs in mount.h */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_statfs { short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ - user_long_t f_bsize; /* fundamental file system block size */ + user_long_t f_bsize __attribute((aligned(8))); /* fundamental file system block size */ user_long_t f_iosize; /* optimal transfer block size */ user_long_t f_blocks; /* total data blocks in file system */ user_long_t f_bfree; /* free blocks in fs */ @@ -254,14 +250,10 @@ struct user_statfs { user_long_t f_reserved4[0]; /* For future use */ #else char f_reserved3; /* For alignment */ - user_long_t f_reserved4[4]; /* For future use */ + user_long_t f_reserved4[4] __attribute((aligned(8))); /* For future use */ #endif }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - __BEGIN_DECLS extern TAILQ_HEAD(mntlist, mount) mountlist; diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h index 775a41030..f588c1b05 100644 --- a/bsd/sys/namei.h +++ b/bsd/sys/namei.h @@ -209,7 +209,6 @@ int reverse_lookup(vnode_t start_vp, vnode_t *lookup_vpp, * Stats on usefulness of namei caches. */ struct nchstats { - long ncs_negtotal; long ncs_goodhits; /* hits that we can really use */ long ncs_neghits; /* negative hits that we can use */ long ncs_badhits; /* hits we must drop */ diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index cbf1b3a80..161b7b1f3 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -177,7 +177,8 @@ struct extern_proc { #define P_OWEUPC 0x00008000 /* Owe process an addupc() call at next ast. */ #define P_AFFINITY 0x00010000 /* xxx */ -#define P_CLASSIC 0x00020000 /* xxx */ +#define P_TRANSLATED 0x00020000 /* xxx */ +#define P_CLASSIC P_TRANSLATED /* xxx */ /* #define P_FSTRACE 0x10000 / * tracing via file system (elsewhere?) * / #define P_SSTEP 0x20000 / * process needs single-step fixup ??? * / diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h new file mode 100644 index 000000000..956a0684b --- /dev/null +++ b/bsd/sys/proc_info.h @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_PROC_INFO_H +#define _SYS_PROC_INFO_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__BEGIN_DECLS + + +#define PROC_ALL_PIDS 1 +#define PROC_PGRP_ONLY 2 +#define PROC_TTY_ONLY 3 +#define PROC_UID_ONLY 4 +#define PROC_RUID_ONLY 5 + +struct proc_bsdinfo { + uint32_t pbi_flags; /* 64bit; emulated etc */ + uint32_t pbi_status; + uint32_t pbi_xstatus; + uint32_t pbi_pid; + uint32_t pbi_ppid; + uid_t pbi_uid; + gid_t pbi_gid; + uid_t pbi_ruid; + gid_t pbi_rgid; + uid_t pbi_svuid; + gid_t pbi_svgid; + char pbi_comm[MAXCOMLEN + 1]; + char pbi_name[2*MAXCOMLEN + 1]; /* empty if no name is registered */ + uint32_t pbi_nfiles; + uint32_t pbi_pgid; + uint32_t pbi_pjobc; + uint32_t e_tdev; /* controlling tty dev */ + uint32_t e_tpgid; /* tty process group id */ + struct timeval pbi_start; + int32_t pbi_nice; +}; + + + +/* pbi_flags values */ +#define PROC_FLAG_SYSTEM 1 +#define PROC_FLAG_TRACED 2 +#define PROC_FLAG_INEXIT 4 +#define PROC_FLAG_PPWAIT 8 +#define PROC_FLAG_LP64 0x10 +#define PROC_FLAG_SLEADER 0x20 +#define PROC_FLAG_CTTY 0x40 +#define PROC_FLAG_CONTROLT 0x80 + + +struct proc_taskinfo { + uint64_t pti_virtual_size; /* virtual memory size (bytes) */ + uint64_t pti_resident_size; /* resident memory size (bytes) */ + uint64_t pti_total_user; /* total time */ + uint64_t pti_total_system; + uint64_t pti_threads_user; /* existing threads only */ + uint64_t pti_threads_system; + int32_t pti_policy; /* default policy for new threads */ + int32_t pti_faults; /* number of page faults */ + int32_t pti_pageins; /* number of actual pageins */ + int32_t pti_cow_faults; /* number of copy-on-write faults */ + int32_t pti_messages_sent; /* number of messages sent */ + int32_t pti_messages_received; /* number of messages received */ + int32_t pti_syscalls_mach; /* number of mach system calls */ + int32_t pti_syscalls_unix; /* number of unix system calls */ + int32_t pti_csw; /* number of context switches */ + int32_t pti_threadnum; /* number of threads in the task */ + int32_t pti_numrunning; /* number of running threads */ + int32_t pti_priority; /* task priority*/ +}; + +struct proc_taskallinfo { + struct proc_bsdinfo pbsd; + struct proc_taskinfo ptinfo; +}; + + +struct proc_threadinfo { + uint64_t pth_user_time; /* user run time */ + uint64_t pth_system_time; /* system run time */ + int32_t pth_cpu_usage; /* scaled cpu usage percentage */ + int32_t pth_policy; /* scheduling policy in effect */ + int32_t pth_run_state; /* run state (see below) */ + int32_t pth_flags; /* various flags (see below) */ + int32_t pth_sleep_time; /* number of seconds that thread */ + int32_t pth_curpri; /* cur priority*/ + int32_t pth_priority; /* priority*/ + int32_t pth_maxpriority; /* max priority*/ +}; + +struct proc_regioninfo { + uint32_t pri_protection; + uint32_t pri_max_protection; + uint32_t pri_inheritance; + uint32_t pri_flags; /* shared, external pager, is submap */ + uint64_t pri_offset; + uint32_t pri_behavior; + uint32_t pri_user_wired_count; + uint32_t pri_user_tag; + uint32_t pri_pages_resident; + uint32_t pri_pages_shared_now_private; + uint32_t pri_pages_swapped_out; + uint32_t pri_pages_dirtied; + uint32_t pri_ref_count; + uint32_t pri_shadow_depth; + uint32_t pri_share_mode; + uint32_t pri_private_pages_resident; + uint32_t pri_shared_pages_resident; + uint32_t pri_obj_id; + uint64_t pri_address; + uint64_t pri_size; + uint32_t pri_depth; +}; + +#define PROC_REGION_SUBMAP 1 +#define PROC_REGION_SHARED 2 + +#define SM_COW 1 +#define SM_PRIVATE 2 +#define SM_EMPTY 3 +#define SM_SHARED 4 +#define SM_TRUESHARED 5 +#define SM_PRIVATE_ALIASED 6 +#define SM_SHARED_ALIASED 7 + + +/* + * Thread run states (state field). + */ + +#define TH_STATE_RUNNING 1 /* thread is running normally */ +#define TH_STATE_STOPPED 2 /* thread is stopped */ +#define TH_STATE_WAITING 3 /* thread is waiting normally */ +#define TH_STATE_UNINTERRUPTIBLE 4 /* thread is in an uninterruptible + wait */ +#define TH_STATE_HALTED 5 /* thread is halted at a + clean point */ + +/* + * Thread flags (flags field). + */ +#define TH_FLAGS_SWAPPED 0x1 /* thread is swapped out */ +#define TH_FLAGS_IDLE 0x2 /* thread is an idle thread */ + + + +struct proc_fileinfo { + uint32_t fi_openflags; + uint32_t fi_status; + off_t fi_offset; + int32_t fi_type; +}; + + +struct vnode_info { + struct stat vi_stat; + int vi_type; + fsid_t vi_fsid; +}; + +struct vnode_info_path { + struct vnode_info vip_vi; + char vip_path[MAXPATHLEN]; /* tail end of it */ +}; + +struct vnode_fdinfo { + struct proc_fileinfo pfi; + struct vnode_info pvi; +}; + +struct vnode_fdinfowithpath { + struct proc_fileinfo pfi; + struct vnode_info_path pvip; + +}; + + +struct proc_regionwithpathinfo { + struct proc_regioninfo prp_prinfo; + struct vnode_info_path prp_vip; +}; + +struct proc_vnodepathinfo { + struct vnode_info_path pvi_cdir; + struct vnode_info_path pvi_rdir; +}; + + +/* + * Socket + */ + + +/* + * IPv4 and IPv6 Sockets + */ + +#define INI_IPV4 0x1 +#define INI_IPV6 0x2 + +struct in4in6_addr { + u_int32_t i46a_pad32[3]; + struct in_addr i46a_addr4; +}; + +struct in_sockinfo { + int insi_fport; /* foreign port */ + int insi_lport; /* local port */ + uint64_t insi_gencnt; /* generation count of this instance */ + uint32_t insi_flags; /* generic IP/datagram flags */ + uint32_t insi_flow; + + uint8_t insi_vflag; /* ini_IPV4 or ini_IPV6 */ + uint8_t insi_ip_ttl; /* time to live proto */ + /* protocol dependent part */ + union { + struct in4in6_addr ina_46; + struct in6_addr ina_6; + } insi_faddr; /* foreign host table entry */ + union { + struct in4in6_addr ina_46; + struct in6_addr ina_6; + } insi_laddr; /* local host table entry */ + struct { + u_char in4_tos; /* type of service */ + } insi_v4; + struct { + uint8_t in6_hlim; + int in6_cksum; + u_short in6_ifindex; + short in6_hops; + } insi_v6; +}; + +/* + * TCP Sockets + */ + +#define TSI_T_REXMT 0 /* retransmit */ +#define TSI_T_PERSIST 1 /* retransmit persistence */ +#define TSI_T_KEEP 2 /* keep alive */ +#define TSI_T_2MSL 3 /* 2*msl quiet time timer */ +#define TSI_T_NTIMERS 4 + +#define TSI_S_CLOSED 0 /* closed */ +#define TSI_S_LISTEN 1 /* listening for connection */ +#define TSI_S_SYN_SENT 2 /* active, have sent syn */ +#define TSI_S_SYN_RECEIVED 3 /* have send and received syn */ +#define TSI_S_ESTABLISHED 4 /* established */ +#define TSI_S__CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +#define TSI_S_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TSI_S_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TSI_S_LAST_ACK 8 /* had fin and close; await FIN ACK */ +#define TSI_S_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TSI_S_TIME_WAIT 10 /* in 2*msl quiet wait after close */ +#define TSI_S_RESERVED 11 /* pseudo state: reserved */ + +struct tcp_sockinfo { + struct in_sockinfo tcpsi_ini; + int tcpsi_state; + int tcpsi_timer[TSI_T_NTIMERS]; + int tcpsi_mss; + uint32_t tcpsi_flags; + uint64_t tcpsi_tp; /* opaque handle of TCP protocol control block */ +}; + +/* + * Unix Domain Sockets + */ + + +struct un_sockinfo { + uint64_t unsi_conn_so; /* opaque handle of connected socket */ + uint64_t unsi_conn_pcb; /* opaque handle of connected protocol control block */ + union { + struct sockaddr_un ua_sun; + char ua_dummy[SOCK_MAXADDRLEN]; + } unsi_addr; /* bound address */ + union { + struct sockaddr_un ua_sun; + char ua_dummy[SOCK_MAXADDRLEN]; + } unsi_caddr; /* address of socket connected to */ +}; + +/* + * PF_NDRV Sockets + */ + +struct ndrv_info { + uint32_t ndrvsi_if_family; + uint32_t ndrvsi_if_unit; + char ndrvsi_if_name[IF_NAMESIZE]; +}; + +/* + * Kernel Event Sockets + */ + +struct kern_event_info { + uint32_t kesi_vendor_code_filter; + uint32_t kesi_class_filter; + uint32_t kesi_subclass_filter; +}; + +/* + * Kernel Control Sockets + */ + +struct kern_ctl_info { + uint32_t kcsi_id; + uint32_t kcsi_reg_unit; + uint32_t kcsi_flags; /* support flags */ + uint32_t kcsi_recvbufsize; /* request more than the default buffer size */ + uint32_t kcsi_sendbufsize; /* request more than the default buffer size */ + uint32_t kcsi_unit; + char kcsi_name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ +}; + +/* soi_state */ + +#define SOI_S_NOFDREF 0x0001 /* no file table ref any more */ +#define SOI_S_ISCONNECTED 0x0002 /* socket connected to a peer */ +#define SOI_S_ISCONNECTING 0x0004 /* in process of connecting to peer */ +#define SOI_S_ISDISCONNECTING 0x0008 /* in process of disconnecting */ +#define SOI_S_CANTSENDMORE 0x0010 /* can't send more data to peer */ +#define SOI_S_CANTRCVMORE 0x0020 /* can't receive more data from peer */ +#define SOI_S_RCVATMARK 0x0040 /* at mark on input */ +#define SOI_S_PRIV 0x0080 /* privileged for broadcast, raw... */ +#define SOI_S_NBIO 0x0100 /* non-blocking ops */ +#define SOI_S_ASYNC 0x0200 /* async i/o notify */ +#define SOI_S_INCOMP 0x0800 /* Unaccepted, incomplete connection */ +#define SOI_S_COMP 0x1000 /* unaccepted, complete connection */ +#define SOI_S_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ +#define SOI_S_DRAINING 0x4000 /* close waiting for blocked system calls to drain */ + +struct sockbuf_info { + uint32_t sbi_cc; + uint32_t sbi_hiwat; /* SO_RCVBUF, SO_SNDBUF */ + uint32_t sbi_mbcnt; + uint32_t sbi_mbmax; + uint32_t sbi_lowat; + short sbi_flags; + short sbi_timeo; +}; + +enum { + SOCKINFO_GENERIC = 0, + SOCKINFO_IN = 1, + SOCKINFO_TCP = 2, + SOCKINFO_UN = 3, + SOCKINFO_NDRV = 4, + SOCKINFO_KERN_EVENT = 5, + SOCKINFO_KERN_CTL = 6 +}; + +struct socket_info { + struct stat soi_stat; + uint64_t soi_so; /* opaque handle of socket */ + uint64_t soi_pcb; /* opaque handle of protocol control block */ + int soi_type; + int soi_protocol; + int soi_family; + short soi_options; + short soi_linger; + short soi_state; + short soi_qlen; + short soi_incqlen; + short soi_qlimit; + short soi_timeo; + u_short soi_error; + uint32_t soi_oobmark; + struct sockbuf_info soi_rcv; + struct sockbuf_info soi_snd; + int soi_kind; + union { + struct in_sockinfo pri_in; /* SOCKINFO_IN */ + struct tcp_sockinfo pri_tcp; /* SOCKINFO_TCP */ + struct un_sockinfo pri_un; /* SOCKINFO_UN */ + struct ndrv_info pri_ndrv; /* SOCKINFO_NDRV */ + struct kern_event_info pri_kern_event; /* SOCKINFO_KERN_EVENT */ + struct kern_ctl_info pri_kern_ctl; /* SOCKINFO_KERN_CTL */ + } soi_proto; +}; + +struct socket_fdinfo { + struct proc_fileinfo pfi; + struct socket_info psi; +}; + + + +struct psem_info { + struct stat psem_stat; + char psem_name[MAXPATHLEN]; +}; + +struct psem_fdinfo { + struct proc_fileinfo pfi; + struct psem_info pseminfo; +}; + + + +struct pshm_info { + struct stat pshm_stat; + uint64_t pshm_mappaddr; + char pshm_name[MAXPATHLEN]; +}; + +struct pshm_fdinfo { + struct proc_fileinfo pfi; + struct pshm_info pshminfo; +}; + + +struct pipe_info { + struct stat pipe_stat; + uint64_t pipe_handle; + uint64_t pipe_peerhandle; + int pipe_status; +}; + +struct pipe_fdinfo { + struct proc_fileinfo pfi; + struct pipe_info pipeinfo; +}; + + +struct kqueue_info { + struct stat kq_stat; + uint32_t kq_state; +}; +#define PROC_KQUEUE_SELECT 1 +#define PROC_KQUEUE_SLEEP 2 + +struct kqueue_fdinfo { + struct proc_fileinfo pfi; + struct kqueue_info kqueueinfo; +}; + +struct appletalk_info { + struct stat atalk_stat; +}; + +struct appletalk_fdinfo { + struct proc_fileinfo pfi; + struct appletalk_info appletalkinfo; +}; + + + +/* defns of process file desc type */ +#define PROX_FDTYPE_ATALK 0 +#define PROX_FDTYPE_VNODE 1 +#define PROX_FDTYPE_SOCKET 2 +#define PROX_FDTYPE_PSHM 3 +#define PROX_FDTYPE_PSEM 4 +#define PROX_FDTYPE_KQUEUE 5 +#define PROX_FDTYPE_PIPE 6 +#define PROX_FDTYPE_FSEVENTS 7 + +struct proc_fdinfo { + int32_t proc_fd; + uint32_t proc_fdtype; +}; + +/* Falvors for proc_pidinfo() */ +#define PROC_PIDLISTFDS 1 +#define PROC_PIDLISTFD_SIZE (sizeof(struct proc_fdinfo)) + +#define PROC_PIDTASKALLINFO 2 +#define PROC_PIDTASKALLINFO_SIZE (sizeof(struct proc_taskallinfo)) + +#define PROC_PIDTBSDINFO 3 +#define PROC_PIDTBSDINFO_SIZE (sizeof(struct proc_bsdinfo)) + +#define PROC_PIDTASKINFO 4 +#define PROC_PIDTASKINFO_SIZE (sizeof(struct proc_taskinfo)) + +#define PROC_PIDTHREADINFO 5 +#define PROC_PIDTHREADINFO_SIZE (sizeof(struct proc_threadinfo)) + +#define PROC_PIDLISTTHREADS 6 +#define PROC_PIDLISTTHREADS_SIZE (2* sizeof(uint32_t)) + + +#define PROC_PIDREGIONINFO 7 +#define PROC_PIDREGIONINFO_SIZE (sizeof(struct proc_regioninfo)) + +#define PROC_PIDREGIONPATHINFO 8 +#define PROC_PIDREGIONPATHINFO_SIZE (sizeof(struct proc_regionwithpathinfo)) + +#define PROC_PIDVNODEPATHINFO 9 +#define PROC_PIDVNODEPATHINFO_SIZE (sizeof(struct proc_vnodepathinfo)) + +/* Flavors for proc_pidfdinfo */ + +#define PROC_PIDFDVNODEINFO 1 +#define PROC_PIDFDVNODEINFO_SIZE (sizeof(struct vnode_fdinfo)) + +#define PROC_PIDFDVNODEPATHINFO 2 +#define PROC_PIDFDVNODEPATHINFO_SIZE (sizeof(struct vnode_fdinfowithpath)) + +#define PROC_PIDFDSOCKETINFO 3 +#define PROC_PIDFDSOCKETINFO_SIZE (sizeof(struct socket_fdinfo)) + +#define PROC_PIDFDPSEMINFO 4 +#define PROC_PIDFDPSEMINFO_SIZE (sizeof(struct psem_fdinfo)) + +#define PROC_PIDFDPSHMINFO 5 +#define PROC_PIDFDPSHMINFO_SIZE (sizeof(struct pshm_fdinfo)) + +#define PROC_PIDFDPIPEINFO 6 +#define PROC_PIDFDPIPEINFO_SIZE (sizeof(struct pipe_fdinfo)) + +#define PROC_PIDFDKQUEUEINFO 7 +#define PROC_PIDFDKQUEUEINFO_SIZE (sizeof(struct kqueue_fdinfo)) + +#define PROC_PIDFDATALKINFO 8 +#define PROC_PIDFDATALKINFO_SIZE (sizeof(struct appletalk_fdinfo)) + + + +#ifdef XNU_KERNEL_PRIVATE +extern int fill_socketinfo(socket_t so, struct socket_info *si); +extern int fill_pshminfo(struct pshmnode * pshm, struct pshm_info * pinfo); +extern int fill_pseminfo(struct psemnode * psem, struct psem_info * pinfo); +extern int fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo); +extern int fill_kqueueinfo(struct kqueue * kq, struct kqueue_info * kinfo); +#endif /* XNU_KERNEL_PRIVATE */ + + +__END_DECLS + +#endif /*_SYS_PROC_INFO_H */ diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index 4147cbe1f..e0e150146 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -182,6 +182,7 @@ struct proc { u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ char p_nice; /* Process "nice" value. */ char p_comm[MAXCOMLEN+1]; + char p_name[(2*MAXCOMLEN)+1]; struct pgrp *p_pgrp; /* Pointer to process group. */ @@ -244,6 +245,7 @@ struct proc { #define P_LREFDRAIN 0x40 #define P_LREFDRAINWAIT 0x80 #define P_LREFDEAD 0x100 +#define P_LTHSIGSTACK 0x200 /* advisory flags in the proc */ #define P_LADVLOCK 0x01 @@ -258,10 +260,6 @@ struct proc { #ifdef KERNEL #include /* user_timeval, user_itimerval */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_extern_proc { union { struct { @@ -278,7 +276,7 @@ struct user_extern_proc { pid_t p_oppid; /* Save parent pid during ptrace. XXX */ int p_dupfd; /* Sideways return value from fdopen. XXX */ /* Mach related */ - user_addr_t user_stack; /* where user stack was allocated */ + user_addr_t user_stack __attribute((aligned(8))); /* where user stack was allocated */ user_addr_t exit_thread; /* XXX Which thread is exiting? */ int p_debugger; /* allow to debug */ boolean_t sigwait; /* indication to suspend */ @@ -286,7 +284,7 @@ struct user_extern_proc { u_int p_estcpu; /* Time averaged value of p_cpticks. */ int p_cpticks; /* Ticks of cpu time. */ fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ - user_addr_t p_wchan; /* Sleep address. */ + user_addr_t p_wchan __attribute((aligned(8))); /* Sleep address. */ user_addr_t p_wmesg; /* Reason for sleep. */ u_int p_swtime; /* Time swapped in or out. */ u_int p_slptime; /* Time since last blocked. */ @@ -296,9 +294,9 @@ struct user_extern_proc { u_quad_t p_sticks; /* Statclock hits in system mode. */ u_quad_t p_iticks; /* Statclock hits processing intr. */ int p_traceflag; /* Kernel trace points. */ - user_addr_t p_tracep; /* Trace to vnode. */ + user_addr_t p_tracep __attribute((aligned(8))); /* Trace to vnode. */ int p_siglist; /* DEPRECATED */ - user_addr_t p_textvp; /* Vnode of executable. */ + user_addr_t p_textvp __attribute((aligned(8))); /* Vnode of executable. */ int p_holdcnt; /* If non-zero, don't swap. */ sigset_t p_sigmask; /* DEPRECATED. */ sigset_t p_sigignore; /* Signals being ignored. */ @@ -307,16 +305,12 @@ struct user_extern_proc { u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ char p_nice; /* Process "nice" value. */ char p_comm[MAXCOMLEN+1]; - user_addr_t p_pgrp; /* Pointer to process group. */ + user_addr_t p_pgrp __attribute((aligned(8))); /* Pointer to process group. */ user_addr_t p_addr; /* Kernel virtual addr of u-area (PROC ONLY). */ u_short p_xstat; /* Exit status for wait; also stop signal. */ u_short p_acflag; /* Accounting flags. */ - user_addr_t p_ru; /* Exit information. XXX */ + user_addr_t p_ru __attribute((aligned(8))); /* Exit information. XXX */ }; - -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif #endif /* KERNEL */ /* @@ -324,6 +318,7 @@ struct user_extern_proc { * as it is used to represent "no process group". */ extern int nprocs, maxproc; /* Current and max number of procs. */ +extern int maxprocperuid; /* Current number of procs per uid */ __private_extern__ int hard_maxproc; /* hard limit */ #define PID_MAX 30000 diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index 693c71733..0609553ac 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -106,9 +106,7 @@ struct socket_filter; #include #endif /* KERNEL */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct protosw { short pr_type; /* socket type used for */ @@ -156,9 +154,7 @@ struct protosw { #endif }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() /* * Values for pr_flags. diff --git a/bsd/sys/quota.h b/bsd/sys/quota.h index 691eecc88..b2b8ebc89 100644 --- a/bsd/sys/quota.h +++ b/bsd/sys/quota.h @@ -172,10 +172,6 @@ struct dqblk { * WARNING - keep in sync with struct dqblk */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_dqblk { u_int64_t dqb_bhardlimit; /* absolute limit on disk bytes alloc */ u_int64_t dqb_bsoftlimit; /* preferred limit on disk bytes */ @@ -183,15 +179,11 @@ struct user_dqblk { u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ u_int32_t dqb_isoftlimit; /* preferred inode limit */ u_int32_t dqb_curinodes; /* current # allocated inodes */ - user_time_t dqb_btime; /* time limit for excessive disk use */ + user_time_t dqb_btime __attribute((aligned(8))); /* time limit for excessive disk use */ user_time_t dqb_itime; /* time limit for excessive files */ u_int32_t dqb_id; /* identifier (0 for empty entries) */ u_int32_t dqb_spare[4]; /* pad struct to power of 2 */ }; - -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif #endif /* KERNEL_PRIVATE */ #define INITQMAGICS { \ @@ -291,8 +283,8 @@ struct quotafile { struct dquot { LIST_ENTRY(dquot) dq_hash; /* hash list */ TAILQ_ENTRY(dquot) dq_freelist; /* free list */ + uint32_t dq_cnt; /* count of active references */ u_int16_t dq_flags; /* flags, see below */ - u_int16_t dq_cnt; /* count of active references */ u_int16_t dq_lflags; /* protected by the quota list lock */ u_int16_t dq_type; /* quota type of this dquot */ u_int32_t dq_id; /* identifier this applies to */ diff --git a/bsd/sys/reboot.h b/bsd/sys/reboot.h index 12eafdc11..4a2384d9e 100644 --- a/bsd/sys/reboot.h +++ b/bsd/sys/reboot.h @@ -80,6 +80,7 @@ #define RB_DFLTROOT 0x20 /* use compiled-in rootdev */ #define RB_ALTBOOT 0x40 /* use /boot.old vs /boot */ #define RB_UNIPROC 0x80 /* don't start slaves */ +#define RB_UPSDELAY 0x200 /* Delays restart by 5 minutes */ #define RB_PANIC 0 /* reboot due to panic */ #define RB_BOOT 1 /* reboot due to boot() */ diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index 823fcc738..45b65fa5d 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -173,13 +173,9 @@ struct rusage { * WARNING - keep in sync with struct timeval */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_rusage_timeval { user_time_t tv_sec; /* seconds */ - __darwin_suseconds_t tv_usec; /* and microseconds */ + __darwin_suseconds_t tv_usec __attribute((aligned(8))); /* and microseconds */ }; struct user_rusage { struct user_rusage_timeval ru_utime; /* user time used */ @@ -200,10 +196,6 @@ struct user_rusage { user_long_t ru_nivcsw; /* involuntary " */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL diff --git a/bsd/sys/shm_internal.h b/bsd/sys/shm_internal.h index e0bd76189..a13cb9cd0 100644 --- a/bsd/sys/shm_internal.h +++ b/bsd/sys/shm_internal.h @@ -89,13 +89,15 @@ struct user_shmid_ds { * might be of interest to user programs. Also part of the ipcs interface. * Note: use of user_ssize_t intentional: permits 32 bit ipcs to provide * information about 64 bit programs shared segments. + * Note 2: user_ssize_t is not that useful if it's not 64-bit, + * so let's use int64_t explicitely... */ struct shminfo { - user_ssize_t shmmax; /* max shm segment size (bytes) */ - user_ssize_t shmmin; /* min shm segment size (bytes) */ - user_ssize_t shmmni; /* max number of shm identifiers */ - user_ssize_t shmseg; /* max shm segments per process */ - user_ssize_t shmall; /* max amount of shm (pages) */ + int64_t shmmax; /* max shm segment size (bytes) */ + int64_t shmmin; /* min shm segment size (bytes) */ + int64_t shmmni; /* max number of shm identifiers */ + int64_t shmseg; /* max shm segments per process */ + int64_t shmall; /* max amount of shm (pages) */ }; #ifdef KERNEL diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 0fa8fb1cb..27914af93 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -204,10 +204,6 @@ struct sigevent { // LP64todo - should this move? #ifdef BSD_KERNEL_PRIVATE -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - union user_sigval { struct { int pad; /* assumes Motorolla byte order */ @@ -224,10 +220,6 @@ struct user_sigevent { user_addr_t sigev_notify_attributes; /* Notify attributes */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* BSD_KERNEL_PRIVATE */ typedef struct __siginfo { @@ -245,10 +237,6 @@ typedef struct __siginfo { #ifdef BSD_KERNEL_PRIVATE -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - typedef struct __user_siginfo { int si_signo; /* signal number */ int si_errno; /* errno association */ @@ -262,10 +250,6 @@ typedef struct __user_siginfo { user_ulong_t pad[7]; /* Reserved for Future Use */ } user_siginfo_t; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* BSD_KERNEL_PRIVATE */ /* @@ -371,10 +355,6 @@ struct sigaction { #ifdef BSD_KERNEL_PRIVATE #include -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - union __user_sigaction_u { user_addr_t __sa_handler; user_addr_t __sa_sigaction; @@ -393,10 +373,6 @@ struct __user_sigaction { int sa_flags; /* see signal options below */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #undef SIG_DFL #undef SIG_IGN #undef SIG_ERR @@ -448,20 +424,12 @@ typedef void (*sig_t)(int); /* type of signal function */ */ #ifdef BSD_KERNEL_PRIVATE -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_sigaltstack { user_addr_t ss_sp; /* signal stack base */ user_size_t ss_size; /* signal stack length */ - int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ + int ss_flags __attribute((aligned(8))); /* SA_DISABLE and/or SA_ONSTACK */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* BSD_KERNEL_PRIVATE */ #ifndef _STACK_T diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 65567cc12..36392eaab 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -485,24 +485,17 @@ struct msghdr { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with struct msghdr */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif struct user_msghdr { user_addr_t msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ - user_addr_t msg_iov; /* scatter/gather array */ + user_addr_t msg_iov __attribute((aligned(8))); /* scatter/gather array */ int msg_iovlen; /* # elements in msg_iov */ - user_addr_t msg_control; /* ancillary data, see below */ + user_addr_t msg_control __attribute((aligned(8))); /* ancillary data, see below */ socklen_t msg_controllen; /* ancillary data buffer len */ int msg_flags; /* flags on received message */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif // KERNEL #define MSG_OOB 0x1 /* process out-of-band data */ diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index a8db652c3..e71e3b8b7 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -204,6 +204,14 @@ struct socket { int so_usecount; /* refcounting of socket use */; int so_retaincnt; u_int32_t so_filteruse; /* usecount for the socket filters */ +/* for debug pruposes */ +#define SO_LCKDBG_MAX 4 /* number of debug locking Link Registers recorded */ + u_int32_t lock_lr[SO_LCKDBG_MAX]; /* socket locking calling history */ + int next_lock_lr; + u_int32_t unlock_lr[SO_LCKDBG_MAX]; /* socket unlocking caller history */ + int next_unlock_lr; + + void *reserved3; /* Temporarily in use/debug: last socket lock LR */ void *reserved4; /* Temporarily in use/debug: last socket unlock LR */ thread_t so_send_filt_thread; @@ -369,7 +377,6 @@ int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int *error_out); int sbappendrecord(struct sockbuf *sb, struct mbuf *m0); void sbcheck(struct sockbuf *sb); -int sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level); void sbdrop(struct sockbuf *sb, int len); diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index 3acb7c4a9..2ac214111 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -122,6 +122,7 @@ typedef __darwin_off_t off_t; typedef __darwin_time_t time_t; #endif + /* [XSI] The timespec structure may be defined as described in */ #ifndef _TIMESPEC #define _TIMESPEC @@ -129,6 +130,7 @@ struct timespec { time_t tv_sec; /* seconds */ long tv_nsec; /* and nanoseconds */ }; + // LP64todo - should this move? #ifdef KERNEL /* LP64 version of struct timespec. time_t is a long and must grow when @@ -137,7 +139,7 @@ struct timespec { */ struct user_timespec { user_time_t tv_sec; /* seconds */ - __int64_t tv_nsec; /* and nanoseconds */ + int32_t tv_nsec __attribute((aligned(8))); /* and nanoseconds */ }; #endif // KERNEL #endif /* _TIMESPEC */ @@ -212,9 +214,6 @@ struct stat { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with struct stat */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif struct user_stat { dev_t st_dev; /* [XSI] ID of device containing file */ @@ -245,10 +244,6 @@ struct user_stat { __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - extern void munge_stat(struct stat *sbp, struct user_stat *usbp); #endif // KERNEL diff --git a/bsd/sys/sys_domain.h b/bsd/sys/sys_domain.h index 788b71d96..166637902 100644 --- a/bsd/sys/sys_domain.h +++ b/bsd/sys/sys_domain.h @@ -27,6 +27,13 @@ #include #include +#include + +#ifdef KERNEL_PRIVATE +#include +#include +#include +#endif /* KERNEL_PRIVATE */ /* Kernel Events Protocol */ #define SYSPROTO_EVENT 1 /* kernel events protocol */ @@ -38,16 +45,56 @@ /* System family socket address */ struct sockaddr_sys { - u_char ss_len; /* sizeof(struct sockaddr_sys) */ - u_char ss_family; /* AF_SYSTEM */ - u_int16_t ss_sysaddr; /* protocol address in AF_SYSTEM */ - u_int32_t ss_reserved[7]; /* reserved to the protocol use */ + u_char ss_len; /* sizeof(struct sockaddr_sys) */ + u_char ss_family; /* AF_SYSTEM */ + u_int16_t ss_sysaddr; /* protocol address in AF_SYSTEM */ + u_int32_t ss_reserved[7]; /* reserved to the protocol use */ }; -#ifdef KERNEL #ifdef KERNEL_PRIVATE +/* + * internal structure maintained for each register controller + */ +struct ctl_cb; +struct socket; + +struct kctl +{ + TAILQ_ENTRY(kctl) next; /* controller chain */ + + /* controller information provided when registering */ + char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ + u_int32_t id; + u_int32_t reg_unit; + + /* misc communication information */ + u_int32_t flags; /* support flags */ + u_int32_t recvbufsize; /* request more than the default buffer size */ + u_int32_t sendbufsize; /* request more than the default buffer size */ + + /* Dispatch functions */ + ctl_connect_func connect; /* Make contact */ + ctl_disconnect_func disconnect; /* Break contact */ + ctl_send_func send; /* Send data to nke */ + ctl_setopt_func setopt; /* set kctl configuration */ + ctl_getopt_func getopt; /* get kctl configuration */ + + TAILQ_HEAD(, ctl_cb) kcb_head; + u_int32_t lastunit; +}; + +struct ctl_cb { + TAILQ_ENTRY(ctl_cb) next; /* controller chain */ + lck_mtx_t *mtx; + struct socket *so; /* controlling socket */ + struct kctl *kctl; /* back pointer to controller */ + u_int32_t unit; + void *userdata; +}; + + extern struct domain systemdomain; /* built in system domain protocols init function */ @@ -57,7 +104,6 @@ int kern_control_init(void); __END_DECLS #endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ #endif /* _SYSTEM_DOMAIN_H_ */ diff --git a/bsd/sys/syscall.h b/bsd/sys/syscall.h index 3df5679d3..5d20a40f4 100644 --- a/bsd/sys/syscall.h +++ b/bsd/sys/syscall.h @@ -138,11 +138,7 @@ #define SYS_getpriority 100 /* 101 old send */ /* 102 old recv */ -#ifdef __ppc__ /* 103 old sigreturn */ -#else -#define SYS_sigreturn 103 -#endif #define SYS_bind 104 #define SYS_setsockopt 105 #define SYS_listen 106 @@ -155,12 +151,7 @@ /* 113 old recvmsg */ /* 114 old sendmsg */ /* 115 old vtrace */ -#ifdef __ppc__ -#define SYS_ppc_gettimeofday 116 -#define SYS_gettimeofday 116 -#else #define SYS_gettimeofday 116 -#endif #define SYS_getrusage 117 #define SYS_getsockopt 118 /* 119 old resuba */ @@ -236,12 +227,8 @@ #define SYS_setgid 181 #define SYS_setegid 182 #define SYS_seteuid 183 -#ifdef __ppc__ #define SYS_sigreturn 184 -#else - /* 184 */ -#endif - /* 185 */ +#define SYS_chud 185 /* 186 */ /* 187 */ #define SYS_stat 188 @@ -266,16 +253,6 @@ #define SYS_mlock 203 #define SYS_munlock 204 #define SYS_undelete 205 -#ifdef __ppc__ -#define SYS_ATsocket 206 -#define SYS_ATgetmsg 207 -#define SYS_ATputmsg 208 -#define SYS_ATPsndreq 209 -#define SYS_ATPsndrsp 210 -#define SYS_ATPgetreq 211 -#define SYS_ATPgetrsp 212 - /* 213 Reserved for AppleTalk */ -#else #define SYS_ATsocket 206 #define SYS_ATgetmsg 207 #define SYS_ATputmsg 208 @@ -284,7 +261,6 @@ #define SYS_ATPgetreq 211 #define SYS_ATPgetrsp 212 /* 213 Reserved for AppleTalk */ -#endif /* __ppc__ */ #define SYS_kqueue_from_portset_np 214 #define SYS_kqueue_portset_np 215 #define SYS_mkcomplex 216 @@ -416,7 +392,7 @@ #define SYS___pthread_canceled 333 #define SYS___semwait_signal 334 #define SYS_utrace 335 - /* 336 */ +#define SYS_proc_info 336 /* 337 */ /* 338 */ /* 339 */ @@ -445,7 +421,7 @@ #define SYS_kqueue 362 #define SYS_kevent 363 #define SYS_lchown 364 - /* 365 */ +#define SYS_stack_snapshot 365 /* 366 */ /* 367 */ /* 368 */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index e6fb7b1b6..57b88b8db 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -350,8 +350,10 @@ SYSCTL_DECL(_user); #define KERN_PANICINFO 41 /* node: panic UI information */ #define KERN_SYSV 42 /* node: System V IPC information */ #define KERN_AFFINITY 43 /* xxx */ -#define KERN_CLASSIC 44 /* xxx */ -#define KERN_CLASSICHANDLER 45 /* xxx */ +#define KERN_TRANSLATE 44 /* xxx */ +#define KERN_CLASSIC KERN_TRANSLATE /* XXX backwards compat */ +#define KERN_EXEC 45 /* xxx */ +#define KERN_CLASSICHANDLER KERN_EXEC /* XXX backwards compatibility */ #define KERN_AIOMAX 46 /* int: max aio requests */ #define KERN_AIOPROCMAX 47 /* int: max aio requests per process */ #define KERN_AIOTHREADS 48 /* int: max aio worker threads */ @@ -367,8 +369,12 @@ SYSCTL_DECL(_user); #define KERN_LOW_PRI_WINDOW 56 /* int: set/reset throttle window - milliseconds */ #define KERN_LOW_PRI_DELAY 57 /* int: set/reset throttle delay - milliseconds */ #define KERN_POSIX 58 /* node: posix tunables */ -#define KERN_USRSTACK64 59 /* LP64 user stack query */ -#define KERN_MAXID 60 /* number of valid kern ids */ +#define KERN_USRSTACK64 59 /* LP64 user stack query */ +#define KERN_NX_PROTECTION 60 /* int: whether no-execute protection is enabled */ +#define KERN_TFP 61 /* Task for pid settings */ +#define KERN_PROCNAME 62 /* setup process program name(2*MAXCOMLEN) */ +#define KERN_THALTSTACK 63 /* setup process to have per thread sigaltstack */ +#define KERN_MAXID 64 /* number of valid kern ids */ #if defined(__LP64__) #define KERN_USRSTACK KERN_USRSTACK64 @@ -376,6 +382,16 @@ SYSCTL_DECL(_user); #define KERN_USRSTACK KERN_USRSTACK32 #endif +/* KERN_TFP types */ +#define KERN_TFP_POLICY 1 +#define KERN_TFP_READ_GROUP 2 +#define KERN_TFP_RW_GROUP 3 + +/* KERN_TFP_POLICY values . All policies allow task port for self */ +#define KERN_TFP_POLICY_DENY 0 /* Deny Mode: None allowed except privileged */ +#define KERN_TFP_POLICY_PERMISSIVE 1 /* Permissive Mode: related ones allowed or privileged */ +#define KERN_TFP_POLICY_RESTRICTED 2 /* Restricted Mode: privileged or setgid and realted */ + /* KERN_KDEBUG types */ #define KERN_KDEFLAGS 1 #define KERN_KDDFLAGS 2 @@ -469,7 +485,7 @@ SYSCTL_DECL(_user); { "sysv", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ { "dummy", CTLTYPE_INT }, \ - { "dummy", CTLTYPE_INT }, \ + { "exec", CTLTYPE_NODE }, \ { "aiomax", CTLTYPE_INT }, \ { "aioprocmax", CTLTYPE_INT }, \ { "aiothreads", CTLTYPE_INT }, \ @@ -482,7 +498,12 @@ SYSCTL_DECL(_user); { "proc_low_pri_io", CTLTYPE_INT }, \ { "low_pri_window", CTLTYPE_INT }, \ { "low_pri_delay", CTLTYPE_INT }, \ - { "posix", CTLTYPE_NODE } \ + { "posix", CTLTYPE_NODE }, \ + { "usrstack64", CTLTYPE_QUAD }, \ + { "nx", CTLTYPE_INT }, \ + { "tfp", CTLTYPE_NODE }, \ + { "procname", CTLTYPE_STRING }, \ + { "threadsigaltstack", CTLTYPE_INT } \ } /* @@ -564,10 +585,6 @@ struct kinfo_proc { * WARNING - keep in sync with _pcred */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_pcred { char pc_lock[72]; /* opaque content */ user_addr_t pc_ucred; /* Current credentials. */ @@ -575,7 +592,7 @@ struct user_pcred { uid_t p_svuid; /* Saved effective user id. */ gid_t p_rgid; /* Real group id. */ gid_t p_svgid; /* Saved effective group id. */ - int p_refcnt; /* Number of references. */ + int p_refcnt __attribute((aligned(8))); /* Number of references. */ }; /* LP64 version of kinfo_proc. all pointers @@ -595,7 +612,7 @@ struct user_kinfo_proc { short e_jobc; /* job control counter */ dev_t e_tdev; /* controlling tty dev */ pid_t e_tpgid; /* tty process group id */ - user_addr_t e_tsess; /* tty session pointer */ + user_addr_t e_tsess __attribute((aligned(8))); /* tty session pointer */ char e_wmesg[WMESGLEN+1]; /* wchan message */ segsz_t e_xsize; /* text size */ short e_xrssize; /* text rss */ @@ -607,10 +624,6 @@ struct user_kinfo_proc { } kp_eproc; }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* BSD_KERNEL_PRIVATE */ #endif /* __APPLE_API_UNSTABLE */ @@ -671,18 +684,11 @@ extern struct loadavg averunnable; // LP64todo - should this move? #ifdef BSD_KERNEL_PRIVATE -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif struct user_loadavg { fixpt_t ldavg[3]; - user_long_t fscale; + user_long_t fscale __attribute((aligned(8))); }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* BSD_KERNEL_PRIVATE */ #endif /* __APPLE_API_PRIVATE */ @@ -799,9 +805,11 @@ struct user_loadavg { * hw.l3cachesize - * * - * These are the selectors for optional processor features. Selectors that return errors are not support on the system. - * Supported features will return 1 if they are recommended or 0 if they are supported but are not expected to help performance. - * Future versions of these selectors may return larger values as necessary so it is best to test for non zero. + * These are the selectors for optional processor features for specific processors. Selectors that return errors are not support + * on the system. Supported features will return 1 if they are recommended or 0 if they are supported but are not expected to help . + * performance. Future versions of these selectors may return larger values as necessary so it is best to test for non zero. + * + * For PowerPC: * * hw.optional.floatingpoint - Floating Point Instructions * hw.optional.altivec - AltiVec Instructions @@ -813,6 +821,14 @@ struct user_loadavg { * hw.optional.datastreams - Data Streams Instructions * hw.optional.dcbtstreams - Data Cache Block Touch Steams Instruction Form * + * For x86 Architecture: + * + * hw.optional.floatingpoint - Floating Point Instructions + * hw.optional.mmx - Original MMX vector instructions + * hw.optional.sse - Streaming SIMD Extensions + * hw.optional.sse2 - Streaming SIMD Extensions 2 + * hw.optional.sse3 - Streaming SIMD Extensions 3 + * hw.optional.x86_64 - 64-bit support */ diff --git a/bsd/sys/syslimits.h b/bsd/sys/syslimits.h index 187fd0791..703c82433 100644 --- a/bsd/sys/syslimits.h +++ b/bsd/sys/syslimits.h @@ -65,7 +65,7 @@ * compile time; you *cannot* set it higher than the hard limit!! */ #define ARG_MAX (256 * 1024) /* max bytes for an exec function */ -#define CHILD_MAX 100 /* max simultaneous processes */ +#define CHILD_MAX 266 /* max simultaneous processes */ #define GID_MAX 2147483647U /* max value for a gid_t (2^31-2) */ #define LINK_MAX 32767 /* max file link count */ #define MAX_CANON 255 /* max bytes in term canon input line */ diff --git a/bsd/sys/sysproto.h b/bsd/sys/sysproto.h index 6ffe47f1e..c42fb5d72 100644 --- a/bsd/sys/sysproto.h +++ b/bsd/sys/sysproto.h @@ -40,13 +40,8 @@ #ifdef KERNEL #ifdef __APPLE_API_PRIVATE -#ifdef __ppc__ #define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ ? 0 : sizeof(uint64_t) - sizeof(t)) -#else -#define PAD_(t) (sizeof(register_t) <= sizeof(t) \ - ? 0 : sizeof(register_t) - sizeof(t)) -#endif #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 #define PADR_(t) PAD_(t) @@ -58,7 +53,6 @@ __BEGIN_DECLS #ifndef __MUNGE_ONCE #define __MUNGE_ONCE -#ifdef __ppc__ void munge_w(const void *, void *); void munge_ww(const void *, void *); void munge_www(const void *, void *); @@ -67,31 +61,25 @@ void munge_wwwww(const void *, void *); void munge_wwwwww(const void *, void *); void munge_wwwwwww(const void *, void *); void munge_wwwwwwww(const void *, void *); -void munge_d(const void *, void *); -void munge_dd(const void *, void *); -void munge_ddd(const void *, void *); -void munge_dddd(const void *, void *); -void munge_ddddd(const void *, void *); -void munge_dddddd(const void *, void *); -void munge_ddddddd(const void *, void *); -void munge_dddddddd(const void *, void *); void munge_wl(const void *, void *); void munge_wlw(const void *, void *); void munge_wwwl(const void *, void *); +void munge_wwwlww(const void *, void *); void munge_wwwwl(const void *, void *); void munge_wwwwwl(const void *, void *); void munge_wsw(const void *, void *); void munge_wws(const void *, void *); void munge_wwwsw(const void *, void *); +#ifdef __ppc__ +void munge_d(const void *, void *); +void munge_dd(const void *, void *); +void munge_ddd(const void *, void *); +void munge_dddd(const void *, void *); +void munge_ddddd(const void *, void *); +void munge_dddddd(const void *, void *); +void munge_ddddddd(const void *, void *); +void munge_dddddddd(const void *, void *); #else -#define munge_w NULL -#define munge_ww NULL -#define munge_www NULL -#define munge_wwww NULL -#define munge_wwwww NULL -#define munge_wwwwww NULL -#define munge_wwwwwww NULL -#define munge_wwwwwwww NULL #define munge_d NULL #define munge_dd NULL #define munge_ddd NULL @@ -100,14 +88,6 @@ void munge_wwwsw(const void *, void *); #define munge_dddddd NULL #define munge_ddddddd NULL #define munge_dddddddd NULL -#define munge_wl NULL -#define munge_wlw NULL -#define munge_wwwl NULL -#define munge_wwwwl NULL -#define munge_wwwwwl NULL -#define munge_wsw NULL -#define munge_wws NULL -#define munge_wwwsw NULL #endif // __ppc__ #endif /* !__MUNGE_ONCE */ @@ -447,12 +427,6 @@ struct getpriority_args { char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; }; -#ifdef __ppc__ -#else -struct sigreturn_args { - char sigcntxp_l_[PADL_(struct sigcontext *)]; struct sigcontext * sigcntxp; char sigcntxp_r_[PADR_(struct sigcontext *)]; -}; -#endif struct bind_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; @@ -472,17 +446,10 @@ struct listen_args { struct sigsuspend_args { char mask_l_[PADL_(sigset_t)]; sigset_t mask; char mask_r_[PADR_(sigset_t)]; }; -#ifdef __ppc__ -struct ppc_gettimeofday_args { - char tp_l_[PADL_(user_addr_t)]; user_addr_t tp; char tp_r_[PADR_(user_addr_t)]; - char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; -}; -#else struct gettimeofday_args { char tp_l_[PADL_(user_addr_t)]; user_addr_t tp; char tp_r_[PADR_(user_addr_t)]; char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; }; -#endif struct getrusage_args { char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; @@ -654,13 +621,18 @@ struct setegid_args { struct seteuid_args { char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)]; }; -#ifdef __ppc__ struct sigreturn_args { char uctx_l_[PADL_(user_addr_t)]; user_addr_t uctx; char uctx_r_[PADR_(user_addr_t)]; char infostyle_l_[PADL_(int)]; int infostyle; char infostyle_r_[PADR_(int)]; }; -#else -#endif +struct chud_args { + char code_l_[PADL_(int)]; int code; char code_r_[PADR_(int)]; + char arg1_l_[PADL_(int)]; int arg1; char arg1_r_[PADR_(int)]; + char arg2_l_[PADL_(int)]; int arg2; char arg2_r_[PADR_(int)]; + char arg3_l_[PADL_(int)]; int arg3; char arg3_r_[PADR_(int)]; + char arg4_l_[PADL_(int)]; int arg4; char arg4_r_[PADR_(int)]; + char arg5_l_[PADL_(int)]; int arg5; char arg5_r_[PADR_(int)]; +}; struct stat_args { char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; @@ -743,7 +715,6 @@ struct munlock_args { struct undelete_args { char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; }; -#ifdef __ppc__ struct ATsocket_args { char proto_l_[PADL_(int)]; int proto; char proto_r_[PADR_(int)]; }; @@ -780,8 +751,6 @@ struct ATPgetrsp_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char bdsp_l_[PADL_(unsigned char *)]; unsigned char * bdsp; char bdsp_r_[PADR_(unsigned char *)]; }; -#else -#endif /* __ppc__ */ struct kqueue_from_portset_np_args { char portset_l_[PADL_(int)]; int portset; char portset_r_[PADR_(int)]; }; @@ -1260,6 +1229,14 @@ struct utrace_args { char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; }; +struct proc_info_args { + char callnum_l_[PADL_(int32_t)]; int32_t callnum; char callnum_r_[PADR_(int32_t)]; + char pid_l_[PADL_(int32_t)]; int32_t pid; char pid_r_[PADR_(int32_t)]; + char flavor_l_[PADL_(uint32_t)]; uint32_t flavor; char flavor_r_[PADR_(uint32_t)]; + char arg_l_[PADL_(uint64_t)]; uint64_t arg; char arg_r_[PADR_(uint64_t)]; + char buffer_l_[PADL_(user_addr_t)]; user_addr_t buffer; char buffer_r_[PADR_(user_addr_t)]; + char buffersize_l_[PADL_(int32_t)]; int32_t buffersize; char buffersize_r_[PADR_(int32_t)]; +}; struct audit_args { char record_l_[PADL_(user_addr_t)]; user_addr_t record; char record_r_[PADR_(user_addr_t)]; char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; @@ -1308,6 +1285,12 @@ struct lchown_args { char owner_l_[PADL_(uid_t)]; uid_t owner; char owner_r_[PADR_(uid_t)]; char group_l_[PADL_(gid_t)]; gid_t group; char group_r_[PADR_(gid_t)]; }; +struct stack_snapshot_args { + char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; + char tracebuf_l_[PADL_(user_addr_t)]; user_addr_t tracebuf; char tracebuf_r_[PADR_(user_addr_t)]; + char tracebuf_size_l_[PADL_(uint32_t)]; uint32_t tracebuf_size; char tracebuf_size_r_[PADR_(uint32_t)]; + char options_l_[PADL_(uint32_t)]; uint32_t options; char options_r_[PADR_(uint32_t)]; +}; int nosys(struct proc *, struct nosys_args *, int *); void exit(struct proc *, struct exit_args *, int *); int fork(struct proc *, struct fork_args *, int *); @@ -1392,19 +1375,11 @@ int setpriority(struct proc *, struct setpriority_args *, int *); int socket(struct proc *, struct socket_args *, int *); int connect(struct proc *, struct connect_args *, int *); int getpriority(struct proc *, struct getpriority_args *, int *); -#ifdef __ppc__ -#else -int sigreturn(struct proc *, struct sigreturn_args *, int *); -#endif int bind(struct proc *, struct bind_args *, int *); int setsockopt(struct proc *, struct setsockopt_args *, int *); int listen(struct proc *, struct listen_args *, int *); int sigsuspend(struct proc *, struct sigsuspend_args *, int *); -#ifdef __ppc__ -int ppc_gettimeofday(struct proc *, struct ppc_gettimeofday_args *, int *); -#else int gettimeofday(struct proc *, struct gettimeofday_args *, int *); -#endif int getrusage(struct proc *, struct getrusage_args *, int *); int getsockopt(struct proc *, struct getsockopt_args *, int *); int readv(struct proc *, struct readv_args *, user_ssize_t *); @@ -1447,10 +1422,8 @@ int kdebug_trace(struct proc *, struct kdebug_trace_args *, int *); int setgid(struct proc *, struct setgid_args *, int *); int setegid(struct proc *, struct setegid_args *, int *); int seteuid(struct proc *, struct seteuid_args *, int *); -#ifdef __ppc__ int sigreturn(struct proc *, struct sigreturn_args *, int *); -#else -#endif +int chud(struct proc *, struct chud_args *, int *); int stat(struct proc *, struct stat_args *, int *); int fstat(struct proc *, struct fstat_args *, int *); int lstat(struct proc *, struct lstat_args *, int *); @@ -1471,7 +1444,6 @@ int __sysctl(struct proc *, struct __sysctl_args *, int *); int mlock(struct proc *, struct mlock_args *, int *); int munlock(struct proc *, struct munlock_args *, int *); int undelete(struct proc *, struct undelete_args *, int *); -#ifdef __ppc__ int ATsocket(struct proc *, struct ATsocket_args *, int *); int ATgetmsg(struct proc *, struct ATgetmsg_args *, int *); int ATputmsg(struct proc *, struct ATputmsg_args *, int *); @@ -1479,8 +1451,6 @@ int ATPsndreq(struct proc *, struct ATPsndreq_args *, int *); int ATPsndrsp(struct proc *, struct ATPsndrsp_args *, int *); int ATPgetreq(struct proc *, struct ATPgetreq_args *, int *); int ATPgetrsp(struct proc *, struct ATPgetrsp_args *, int *); -#else -#endif /* __ppc__ */ int kqueue_from_portset_np(struct proc *, struct kqueue_from_portset_np_args *, int *); int kqueue_portset_np(struct proc *, struct kqueue_portset_np_args *, int *); int getattrlist(struct proc *, struct getattrlist_args *, int *); @@ -1582,6 +1552,7 @@ int __pthread_markcancel(struct proc *, struct __pthread_markcancel_args *, int int __pthread_canceled(struct proc *, struct __pthread_canceled_args *, int *); int __semwait_signal(struct proc *, struct __semwait_signal_args *, int *); int utrace(struct proc *, struct utrace_args *, int *); +int proc_info(struct proc *, struct proc_info_args *, int *); int audit(struct proc *, struct audit_args *, int *); int auditon(struct proc *, struct auditon_args *, int *); int getauid(struct proc *, struct getauid_args *, int *); @@ -1594,6 +1565,7 @@ int auditctl(struct proc *, struct auditctl_args *, int *); int kqueue(struct proc *, struct kqueue_args *, int *); int kevent(struct proc *, struct kevent_args *, int *); int lchown(struct proc *, struct lchown_args *, int *); +int stack_snapshot(struct proc *, struct stack_snapshot_args *, int *); __END_DECLS #undef PAD_ diff --git a/bsd/sys/termios.h b/bsd/sys/termios.h index 47ea1c9ec..d6d13dae3 100644 --- a/bsd/sys/termios.h +++ b/bsd/sys/termios.h @@ -274,24 +274,16 @@ typedef unsigned long long user_speed_t; * WARNING - keep in sync with struct termios */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_termios { user_tcflag_t c_iflag; /* input flags */ user_tcflag_t c_oflag; /* output flags */ user_tcflag_t c_cflag; /* control flags */ user_tcflag_t c_lflag; /* local flags */ cc_t c_cc[NCCS]; /* control chars */ - user_speed_t c_ispeed; /* input speed */ + user_speed_t c_ispeed __attribute((aligned(8))); /* input speed */ user_speed_t c_ospeed; /* output speed */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL */ /* diff --git a/bsd/sys/time.h b/bsd/sys/time.h index a7791b3c3..2801abed4 100644 --- a/bsd/sys/time.h +++ b/bsd/sys/time.h @@ -165,17 +165,33 @@ struct timespec { // LP64todo - should this move? #include /* user_time_t */ -/* LP64 version of struct timeval. time_t is a long and must grow when +/* LP64 version of struct timespec. time_t is a long and must grow when * we're dealing with a 64-bit process. - * WARNING - keep in sync with struct timeval + * WARNING - keep in sync with struct timespec */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural +struct user_timespec { + user_time_t tv_sec; /* seconds */ + int32_t tv_nsec __attribute((aligned(8))); /* and nanoseconds */ +}; + +#endif #endif + +#ifdef KERNEL +#ifndef _USERTIMEVAL +#define _USERTIMEVAL + +#include /* user_time_t */ +/* + * LP64 version of struct timeval. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timeval + */ + struct user_timeval { user_time_t tv_sec; /* seconds */ - suseconds_t tv_usec; /* and microseconds */ + suseconds_t tv_usec __attribute((aligned(8))); /* and microseconds */ }; struct user_itimerval { @@ -183,21 +199,9 @@ struct user_itimerval { struct user_timeval it_value; /* current value */ }; -/* LP64 version of struct timespec. time_t is a long and must grow when - * we're dealing with a 64-bit process. - * WARNING - keep in sync with struct timespec - */ -struct user_timespec { - user_time_t tv_sec; /* seconds */ - int32_t tv_nsec; /* and nanoseconds */ -}; - -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset #endif - #endif // KERNEL -#endif + #define TIMEVAL_TO_TIMESPEC(tv, ts) { \ (ts)->tv_sec = (tv)->tv_sec; \ diff --git a/bsd/sys/ucontext.h b/bsd/sys/ucontext.h index f231226a2..edb6499db 100644 --- a/bsd/sys/ucontext.h +++ b/bsd/sys/ucontext.h @@ -56,10 +56,6 @@ typedef struct ucontext64 ucontext64_t; #ifdef KERNEL #include /* user_addr_t, user_size_t */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - /* kernel representation of struct ucontext64 for 64 bit processes */ struct user_ucontext64 { int uc_onstack; @@ -70,10 +66,6 @@ struct user_ucontext64 { user_addr_t uc_mcontext64; /* machine context */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - typedef struct user_ucontext64 user_ucontext64_t; #endif /* KERNEL */ diff --git a/bsd/sys/user.h b/bsd/sys/user.h index b90a52481..a304b3d0d 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -82,15 +82,15 @@ * Per-thread U area. */ struct uthread { - int *uu_ar0; /* address of users saved R0 */ - /* syscall parameters, results and catches */ u_int64_t uu_arg[8]; /* arguments to current system call */ int *uu_ap; /* pointer to arglist */ - int uu_rval[2]; + int uu_rval[2]; /* thread exception handling */ + int uu_exception; int uu_code; /* ``code'' to trap */ + int uu_subcode; char uu_cursig; /* p_cursig for exc. */ /* support for select - across system calls */ struct _select { @@ -151,7 +151,7 @@ struct uthread { struct ucred *uu_ucred; /* per thread credential */ int uu_defer_reclaims; vnode_t uu_vreclaims; - + struct user_sigaltstack uu_sigstk; #ifdef JOE_DEBUG int uu_iocount; int uu_vpindex; @@ -168,6 +168,7 @@ typedef struct uthread * uthread_t; #define UT_CANCEL 0x00000008 /* thread marked for cancel */ #define UT_CANCELED 0x00000010 /* thread cancelled */ #define UT_CANCELDISABLE 0x00000020 /* thread cancel disabled */ +#define UT_ALTSTACK 0x00000040 /* this thread has alt stack for signals */ #define UT_VFORK 0x02000000 /* thread has vfork children */ #define UT_SETUID 0x04000000 /* thread is settugid() */ diff --git a/bsd/sys/vm.h b/bsd/sys/vm.h index 2ff69a04b..1e171d9e1 100644 --- a/bsd/sys/vm.h +++ b/bsd/sys/vm.h @@ -101,27 +101,19 @@ struct vmspace { * WARNING - keep in sync with vmspace */ -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_vmspace { int vm_refcnt; /* number of references */ - user_addr_t vm_shm; /* SYS5 shared memory private data XXX */ + user_addr_t vm_shm __attribute((aligned(8))); /* SYS5 shared memory private data XXX */ segsz_t vm_rssize; /* current resident set size in pages */ segsz_t vm_swrss; /* resident set size before last swap */ segsz_t vm_tsize; /* text size (pages) XXX */ segsz_t vm_dsize; /* data size (pages) XXX */ segsz_t vm_ssize; /* stack size (pages) */ - user_addr_t vm_taddr; /* user virtual address of text XXX */ + user_addr_t vm_taddr __attribute((aligned(8))); /* user virtual address of text XXX */ user_addr_t vm_daddr; /* user virtual address of data XXX */ user_addr_t vm_maxsaddr; /* user VA at max stack growth */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL */ #include diff --git a/bsd/sys/vnioctl.h b/bsd/sys/vnioctl.h index 37bb0de23..93812dcac 100644 --- a/bsd/sys/vnioctl.h +++ b/bsd/sys/vnioctl.h @@ -88,20 +88,12 @@ struct vn_ioctl { #ifdef KERNEL_PRIVATE -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif - struct user_vn_ioctl { u_int64_t vn_file; /* pathname of file to mount */ int vn_size; /* (returned) size of disk */ vncontrol_t vn_control; }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif - #endif /* KERNEL_PRIVATE */ /* diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h index aa1201b5f..062cc0fee 100644 --- a/bsd/sys/vnode_if.h +++ b/bsd/sys/vnode_if.h @@ -118,7 +118,6 @@ extern struct vnodeop_desc vnop_truncate_desc; extern struct vnodeop_desc vnop_allocate_desc; extern struct vnodeop_desc vnop_pagein_desc; extern struct vnodeop_desc vnop_pageout_desc; -extern struct vnodeop_desc vnop_devblocksize_desc; extern struct vnodeop_desc vnop_searchfs_desc; extern struct vnodeop_desc vnop_copyfile_desc; extern struct vnodeop_desc vnop_blktooff_desc; @@ -704,18 +703,6 @@ struct vnop_pageout_args { extern errno_t VNOP_PAGEOUT(vnode_t, upl_t, vm_offset_t, off_t, size_t, int, vfs_context_t); -#ifdef BSD_KERNEL_PRIVATE -/* - *#% devblocksize vp = = = - *# - */ -struct vnop_devblocksize_args { - struct vnodeop_desc *a_desc; - vnode_t a_vp; - register_t *a_retval; -}; -#endif /* BSD_KERNEL_PRIVATE */ - /* *# *#% searchfs vp L L L diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index df02742df..aa296e4a0 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -297,7 +297,7 @@ extern void * mntvnode_slock; #define VDESC(OP) (& __CONCAT(OP,_desc)) #define VOFFSET(OP) (VDESC(OP)->vdesc_offset) - +struct ostat; int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen); int bdevvp(dev_t dev, struct vnode **vpp); @@ -331,6 +331,7 @@ int vn_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t); int vn_removexattr(vnode_t, const char *, int, vfs_context_t); int vn_listxattr(vnode_t, uio_t, size_t *, int, vfs_context_t); +void name_cache_lock_shared(void); void name_cache_lock(void); void name_cache_unlock(void); diff --git a/bsd/ufs/ffs/ffs_alloc.c b/bsd/ufs/ffs/ffs_alloc.c index 275808fd4..f013837cb 100644 --- a/bsd/ufs/ffs/ffs_alloc.c +++ b/bsd/ufs/ffs/ffs_alloc.c @@ -78,7 +78,7 @@ #if REV_ENDIAN_FS #include -#include +#include #endif /* REV_ENDIAN_FS */ extern u_long nextgennumber; @@ -593,7 +593,7 @@ ffs_blkpref(ip, lbn, indx, bap) if (indx && bap) { if (rev_endian) { if (bap != &ip->i_db[0]) - prev = NXSwapLong(bap[indx - 1]); + prev = OSSwapInt32(bap[indx - 1]); else prev = bap[indx - 1]; } else prev = bap[indx - 1]; @@ -651,7 +651,7 @@ ffs_blkpref(ip, lbn, indx, bap) return (nextblk); } if (bap != &ip->i_db[0]) - prev = NXSwapLong(bap[indx - fs->fs_maxcontig]); + prev = OSSwapInt32(bap[indx - fs->fs_maxcontig]); else prev = bap[indx - fs->fs_maxcontig]; if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk) diff --git a/bsd/ufs/ffs/ffs_balloc.c b/bsd/ufs/ffs/ffs_balloc.c index 5a0cf7bcf..e8d879344 100644 --- a/bsd/ufs/ffs/ffs_balloc.c +++ b/bsd/ufs/ffs/ffs_balloc.c @@ -81,7 +81,7 @@ #if REV_ENDIAN_FS #include -#include +#include #endif /* REV_ENDIAN_FS */ /* @@ -306,7 +306,7 @@ ffs_balloc( bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) - nb = NXSwapLong(bap[indirs[i].in_off]); + nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; @@ -344,7 +344,7 @@ ffs_balloc( } #if REV_ENDIAN_FS if (rev_endian) - bap[indirs[i - 1].in_off] = NXSwapLong(nb); + bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; @@ -375,7 +375,7 @@ ffs_balloc( *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) - bap[indirs[i].in_off] = NXSwapLong(nb); + bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; @@ -579,7 +579,7 @@ ffs_blkalloc( bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) - nb = NXSwapLong(bap[indirs[i].in_off]); + nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; @@ -617,7 +617,7 @@ ffs_blkalloc( } #if REV_ENDIAN_FS if (rev_endian) - bap[indirs[i - 1].in_off] = NXSwapLong(nb); + bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; @@ -648,7 +648,7 @@ ffs_blkalloc( *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) - bap[indirs[i].in_off] = NXSwapLong(nb); + bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; diff --git a/bsd/ufs/ffs/ffs_inode.c b/bsd/ufs/ffs/ffs_inode.c index 4ee62c22b..2887b305c 100644 --- a/bsd/ufs/ffs/ffs_inode.c +++ b/bsd/ufs/ffs/ffs_inode.c @@ -84,7 +84,7 @@ #if REV_ENDIAN_FS #include -#include +#include #endif /* REV_ENDIAN_FS */ static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t, @@ -543,7 +543,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) i--, nlbn += factor) { #if REV_ENDIAN_FS if (rev_endian) - nb = NXSwapLong(bap[i]); + nb = OSSwapInt32(bap[i]); else { #endif /* REV_ENDIAN_FS */ nb = bap[i]; @@ -569,7 +569,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) last = lastbn % factor; #if REV_ENDIAN_FS if (rev_endian) - nb = NXSwapLong(bap[i]); + nb = OSSwapInt32(bap[i]); else { #endif /* REV_ENDIAN_FS */ nb = bap[i]; diff --git a/bsd/ufs/ffs/ffs_subr.c b/bsd/ufs/ffs/ffs_subr.c index 5d466f630..6795bba72 100644 --- a/bsd/ufs/ffs/ffs_subr.c +++ b/bsd/ufs/ffs/ffs_subr.c @@ -76,7 +76,6 @@ #include #if REV_ENDIAN_FS #include -#include #endif /* REV_ENDIAN_FS */ /* diff --git a/bsd/ufs/ffs/ffs_vfsops.c b/bsd/ufs/ffs/ffs_vfsops.c index 06d21d70a..c6186e021 100644 --- a/bsd/ufs/ffs/ffs_vfsops.c +++ b/bsd/ufs/ffs/ffs_vfsops.c @@ -87,7 +87,7 @@ #include #if REV_ENDIAN_FS #include -#include +#include #endif /* REV_ENDIAN_FS */ int ffs_sbupdate(struct ufsmount *, int); @@ -946,7 +946,7 @@ ffs_vfs_getattr(mp, fsap, context) length = ulp->ul_namelen; #if REV_ENDIAN_FS if (mp->mnt_flag & MNT_REVEND) - length = NXSwapShort(length); + length = OSSwapInt16(length); #endif if (length > 0 && length <= UFS_MAX_LABEL_NAME) { bcopy(ulp->ul_name, fsap->f_vol_name, length); @@ -1074,14 +1074,15 @@ ffs_vfs_setattr(mp, fsap, context) /* Copy new name over existing name */ ulp->ul_namelen = strlen(fsap->f_vol_name); -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) - ulp->ul_namelen = NXSwapShort(ulp->ul_namelen); -#endif bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen); ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0'; ulp->ul_name[ulp->ul_namelen] = '\0'; +#if REV_ENDIAN_FS + if (mp->mnt_flag & MNT_REVEND) + ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen); +#endif + /* Update the checksum */ ulp->ul_checksum = 0; ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp)); @@ -1423,21 +1424,22 @@ ffs_fhtovp(mp, fhlen, fhp, vpp, context) struct vnode *nvp; struct fs *fs; int error; + ino_t ino; if (fhlen < (int)sizeof(struct ufid)) return (EINVAL); ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_fs; - if (ufhp->ufid_ino < ROOTINO || - ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) + ino = ntohl(ufhp->ufid_ino); + if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); - error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1); + error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1); if (error) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); - if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) { + if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) { vnode_put(nvp); *vpp = NULLVP; return (ESTALE); @@ -1464,8 +1466,8 @@ ffs_vptofh(vp, fhlenp, fhp, context) return (EOVERFLOW); ip = VTOI(vp); ufhp = (struct ufid *)fhp; - ufhp->ufid_ino = ip->i_number; - ufhp->ufid_gen = ip->i_gen; + ufhp->ufid_ino = htonl(ip->i_number); + ufhp->ufid_gen = htonl(ip->i_gen); *fhlenp = sizeof(struct ufid); return (0); } @@ -1591,7 +1593,7 @@ ffs_sbupdate(mp, waitfor) * before writing */ if (rev_endian) { - dfs->fs_maxfilesize = NXSwapLongLong(mp->um_savedmaxfilesize); /* XXX */ + dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize); /* XXX */ } else { #endif /* REV_ENDIAN_FS */ dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ diff --git a/bsd/ufs/ffs/ffs_vnops.c b/bsd/ufs/ffs/ffs_vnops.c index b8dec359e..d33003352 100644 --- a/bsd/ufs/ffs/ffs_vnops.c +++ b/bsd/ufs/ffs/ffs_vnops.c @@ -86,7 +86,6 @@ #include #if REV_ENDIAN_FS #include -#include #endif /* REV_ENDIAN_FS */ #define VOPFUNC int (*)(void *) @@ -167,7 +166,6 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = { { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ diff --git a/bsd/ufs/ufs/ufs_attrlist.c b/bsd/ufs/ufs/ufs_attrlist.c index bea11d309..f676c7828 100644 --- a/bsd/ufs/ufs/ufs_attrlist.c +++ b/bsd/ufs/ufs/ufs_attrlist.c @@ -34,7 +34,6 @@ #include #include -#include #include #include #include diff --git a/bsd/ufs/ufs/ufs_bmap.c b/bsd/ufs/ufs/ufs_bmap.c index ca7fd9352..e1652c564 100644 --- a/bsd/ufs/ufs/ufs_bmap.c +++ b/bsd/ufs/ufs/ufs_bmap.c @@ -82,7 +82,7 @@ #include #if REV_ENDIAN_FS #include -#include +#include #endif /* REV_ENDIAN_FS */ @@ -220,7 +220,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) daddr = dataptr[xap->in_off]; #if REV_ENDIAN_FS if (rev_endian) - daddr = NXSwapLong(daddr); + daddr = OSSwapInt32(daddr); #endif /* REV_ENDIAN_FS */ if (num == 1 && daddr && runp) { #if REV_ENDIAN_FS @@ -228,8 +228,8 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, - NXSwapLong(dataptr[bn - 1]), - NXSwapLong(dataptr[bn])); + OSSwapInt32(dataptr[bn - 1]), + OSSwapInt32(dataptr[bn])); ++bn, ++*runp); } else { #endif /* REV_ENDIAN_FS */ diff --git a/bsd/ufs/ufs/ufs_byte_order.c b/bsd/ufs/ufs/ufs_byte_order.c index 86f0b010d..80605b2fb 100644 --- a/bsd/ufs/ufs/ufs_byte_order.c +++ b/bsd/ufs/ufs/ufs_byte_order.c @@ -34,11 +34,11 @@ #include #include #include -#include +#include -#define byte_swap_longlong(thing) ((thing) = NXSwapLongLong(thing)) -#define byte_swap_int(thing) ((thing) = NXSwapLong(thing)) -#define byte_swap_short(thing) ((thing) = NXSwapShort(thing)) +#define byte_swap_longlong(thing) ((thing) = OSSwapInt64(thing)) +#define byte_swap_int(thing) ((thing) = OSSwapInt32(thing)) +#define byte_swap_short(thing) ((thing) = OSSwapInt16(thing)) void byte_swap_longlongs(unsigned long long *array, int count) @@ -230,32 +230,32 @@ byte_swap_inode_in(struct dinode *di, struct inode *ip) { int i; - ip->i_mode = NXSwapShort(di->di_mode); - ip->i_nlink = NXSwapShort(di->di_nlink); - ip->i_oldids[0] = NXSwapShort(di->di_u.oldids[0]); - ip->i_oldids[1] = NXSwapShort(di->di_u.oldids[1]); - ip->i_size = NXSwapLongLong(di->di_size); - ip->i_atime = NXSwapLong(di->di_atime); - ip->i_atimensec = NXSwapLong(di->di_atimensec); - ip->i_mtime = NXSwapLong(di->di_mtime); - ip->i_mtimensec = NXSwapLong(di->di_mtimensec); - ip->i_ctime = NXSwapLong(di->di_ctime); - ip->i_ctimensec = NXSwapLong(di->di_ctimensec); + ip->i_mode = OSSwapInt16(di->di_mode); + ip->i_nlink = OSSwapInt16(di->di_nlink); + ip->i_oldids[0] = OSSwapInt16(di->di_u.oldids[0]); + ip->i_oldids[1] = OSSwapInt16(di->di_u.oldids[1]); + ip->i_size = OSSwapInt64(di->di_size); + ip->i_atime = OSSwapInt32(di->di_atime); + ip->i_atimensec = OSSwapInt32(di->di_atimensec); + ip->i_mtime = OSSwapInt32(di->di_mtime); + ip->i_mtimensec = OSSwapInt32(di->di_mtimensec); + ip->i_ctime = OSSwapInt32(di->di_ctime); + ip->i_ctimensec = OSSwapInt32(di->di_ctimensec); if (((ip->i_mode & IFMT) == IFLNK ) && (ip->i_size <= RESYMLNKLEN)) { bcopy(&di->di_shortlink, &ip->i_shortlink, RESYMLNKLEN); } else { for (i=0; i < NDADDR; i++) /* direct blocks */ - ip->i_db[i] = NXSwapLong(di->di_db[i]); + ip->i_db[i] = OSSwapInt32(di->di_db[i]); for (i=0; i < NIADDR; i++) /* indirect blocks */ - ip->i_ib[i] = NXSwapLong(di->di_ib[i]); + ip->i_ib[i] = OSSwapInt32(di->di_ib[i]); } - ip->i_flags = NXSwapLong(di->di_flags); - ip->i_blocks = NXSwapLong(di->di_blocks); - ip->i_gen = NXSwapLong(di->di_gen); - ip->i_uid = NXSwapLong(di->di_uid); - ip->i_gid = NXSwapLong(di->di_gid); - ip->i_spare[0] = NXSwapLong(di->di_spare[0]); - ip->i_spare[1] = NXSwapLong(di->di_spare[1]); + ip->i_flags = OSSwapInt32(di->di_flags); + ip->i_blocks = OSSwapInt32(di->di_blocks); + ip->i_gen = OSSwapInt32(di->di_gen); + ip->i_uid = OSSwapInt32(di->di_uid); + ip->i_gid = OSSwapInt32(di->di_gid); + ip->i_spare[0] = OSSwapInt32(di->di_spare[0]); + ip->i_spare[1] = OSSwapInt32(di->di_spare[1]); } void @@ -267,32 +267,32 @@ byte_swap_inode_out(struct inode *ip, struct dinode *di) mode = (ip->i_mode & IFMT); inosize = ip->i_size; - di->di_mode = NXSwapShort(ip->i_mode); - di->di_nlink = NXSwapShort(ip->i_nlink); - di->di_u.oldids[0] = NXSwapShort(ip->i_oldids[0]); - di->di_u.oldids[1] = NXSwapShort(ip->i_oldids[1]); - di->di_size = NXSwapLongLong(ip->i_size); - di->di_atime = NXSwapLong(ip->i_atime); - di->di_atimensec = NXSwapLong(ip->i_atimensec); - di->di_mtime = NXSwapLong(ip->i_mtime); - di->di_mtimensec = NXSwapLong(ip->i_mtimensec); - di->di_ctime = NXSwapLong(ip->i_ctime); - di->di_ctimensec = NXSwapLong(ip->i_ctimensec); + di->di_mode = OSSwapInt16(ip->i_mode); + di->di_nlink = OSSwapInt16(ip->i_nlink); + di->di_u.oldids[0] = OSSwapInt16(ip->i_oldids[0]); + di->di_u.oldids[1] = OSSwapInt16(ip->i_oldids[1]); + di->di_size = OSSwapInt64(ip->i_size); + di->di_atime = OSSwapInt32(ip->i_atime); + di->di_atimensec = OSSwapInt32(ip->i_atimensec); + di->di_mtime = OSSwapInt32(ip->i_mtime); + di->di_mtimensec = OSSwapInt32(ip->i_mtimensec); + di->di_ctime = OSSwapInt32(ip->i_ctime); + di->di_ctimensec = OSSwapInt32(ip->i_ctimensec); if ((mode == IFLNK) && (inosize <= RESYMLNKLEN)) { bcopy( &ip->i_shortlink, &di->di_shortlink, RESYMLNKLEN); } else { for (i=0; i < NDADDR; i++) /* direct blocks */ - di->di_db[i] = NXSwapLong(ip->i_db[i]); + di->di_db[i] = OSSwapInt32(ip->i_db[i]); for (i=0; i < NIADDR; i++) /* indirect blocks */ - di->di_ib[i] = NXSwapLong(ip->i_ib[i]); + di->di_ib[i] = OSSwapInt32(ip->i_ib[i]); } - di->di_flags = NXSwapLong(ip->i_flags); - di->di_blocks = NXSwapLong(ip->i_blocks); - di->di_gen = NXSwapLong(ip->i_gen); - di->di_uid = NXSwapLong(ip->i_uid); - di->di_gid = NXSwapLong(ip->i_gid); - di->di_spare[0] = NXSwapLong(ip->i_spare[0]); - di->di_spare[1] = NXSwapLong(ip->i_spare[1]); + di->di_flags = OSSwapInt32(ip->i_flags); + di->di_blocks = OSSwapInt32(ip->i_blocks); + di->di_gen = OSSwapInt32(ip->i_gen); + di->di_uid = OSSwapInt32(ip->i_uid); + di->di_gid = OSSwapInt32(ip->i_gid); + di->di_spare[0] = OSSwapInt32(ip->i_spare[0]); + di->di_spare[1] = OSSwapInt32(ip->i_spare[1]); } void diff --git a/bsd/ufs/ufs/ufs_lookup.c b/bsd/ufs/ufs/ufs_lookup.c index 48bbde8c5..884f166c3 100644 --- a/bsd/ufs/ufs/ufs_lookup.c +++ b/bsd/ufs/ufs/ufs_lookup.c @@ -78,10 +78,9 @@ #include #if REV_ENDIAN_FS #include -#include #endif /* REV_ENDIAN_FS */ -extern struct nchstats nchstats; +struct nchstats ufs_nchstats; #if DIAGNOSTIC int dirchk = 1; #else @@ -229,7 +228,7 @@ ufs_lookup(ap) (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) goto out; numdirpasses = 2; - nchstats.ncs_2passes++; + ufs_nchstats.ncs_2passes++; } prevoff = dp->i_offset; endsearch = roundup(dp->i_size, DIRBLKSIZ); @@ -435,7 +434,7 @@ ufs_lookup(ap) found: if (numdirpasses == 2) - nchstats.ncs_pass2++; + ufs_nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. diff --git a/bsd/ufs/ufs/ufs_vnops.c b/bsd/ufs/ufs/ufs_vnops.c index 895610932..846a83253 100644 --- a/bsd/ufs/ufs/ufs_vnops.c +++ b/bsd/ufs/ufs/ufs_vnops.c @@ -94,7 +94,6 @@ #if REV_ENDIAN_FS #include -#include #endif /* REV_ENDIAN_FS */ diff --git a/bsd/uxkern/ux_exception.c b/bsd/uxkern/ux_exception.c index e576ad299..678666e39 100644 --- a/bsd/uxkern/ux_exception.c +++ b/bsd/uxkern/ux_exception.c @@ -234,8 +234,12 @@ catch_exception_raise( /* * Send signal. */ - if (ux_signal != 0) - threadsignal(th_act, ux_signal, ucode); + if (ux_signal != 0) { + ut->uu_exception = exception; + //ut->uu_code = code[0]; // filled in by threadsignal + ut->uu_subcode = code[1]; + threadsignal(th_act, ux_signal, code[0]); + } thread_deallocate(th_act); } diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 7f72472a9..2aac52ed9 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -96,6 +96,8 @@ #include #include +#include + #include #include @@ -1505,6 +1507,39 @@ current_rootdir(void) return vp; } +/* + * Get a filesec and optional acl contents from an extended attribute. + * Function will attempt to retrive ACL, UUID, and GUID information using a + * read of a named extended attribute (KAUTH_FILESEC_XATTR). + * + * Parameters: vp The vnode on which to operate. + * fsecp The filesec (and ACL, if any) being + * retrieved. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in '*fsecp', if retrieved, will be in + * host byte order, as will be the ACL contents, if any. + * Internally, we will cannonize these values from network (PPC) + * byte order after we retrieve them so that the on-disk contents + * of the extended attribute are identical for both PPC and Intel + * (if we were not being required to provide this service via + * fallback, this would be the job of the filesystem + * 'VNOP_GETATTR' call). + * + * We use ntohl() because it has a transitive property on Intel + * machines and no effect on PPC mancines. This guarantees us + * + * XXX: Deleting rather than ignoreing a corrupt security structure is + * probably the only way to reset it without assistance from an + * file system integrity checking tool. Right now we ignore it. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ static int vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) { @@ -1513,6 +1548,9 @@ vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) size_t fsec_size; size_t xsize, rsize; int error; + int i; + uint32_t host_fsec_magic; + uint32_t host_acl_entrycount; fsec = NULL; fsec_uio = NULL; @@ -1556,28 +1594,38 @@ vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) } /* - * Validate security structure. If it's corrupt, we will - * just ignore it. + * Validate security structure; the validation must take place in host + * byte order. If it's corrupt, we will just ignore it. */ + + /* Validate the size before trying to convert it */ if (rsize < KAUTH_FILESEC_SIZE(0)) { KAUTH_DEBUG("ACL - DATA TOO SMALL (%d)", rsize); goto out; } - if (fsec->fsec_magic != KAUTH_FILESEC_MAGIC) { - KAUTH_DEBUG("ACL - BAD MAGIC %x", fsec->fsec_magic); - goto out; - } - if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && - (fsec->fsec_acl.acl_entrycount > KAUTH_ACL_MAX_ENTRIES)) { - KAUTH_DEBUG("ACL - BAD ENTRYCOUNT %x", fsec->fsec_entrycount); + + /* Validate the magic number before trying to convert it */ + host_fsec_magic = ntohl(KAUTH_FILESEC_MAGIC); + if (fsec->fsec_magic != host_fsec_magic) { + KAUTH_DEBUG("ACL - BAD MAGIC %x", host_fsec_magic); goto out; } - if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && - (KAUTH_FILESEC_SIZE(fsec->fsec_acl.acl_entrycount) > rsize)) { - KAUTH_DEBUG("ACL - BUFFER OVERFLOW (%d entries too big for %d)", fsec->fsec_acl.acl_entrycount, rsize); - goto out; + + /* Validate the entry count before trying to convert it. */ + host_acl_entrycount = ntohl(fsec->fsec_acl.acl_entrycount); + if (host_acl_entrycount != KAUTH_FILESEC_NOACL) { + if (host_acl_entrycount > KAUTH_ACL_MAX_ENTRIES) { + KAUTH_DEBUG("ACL - BAD ENTRYCOUNT %x", host_acl_entrycount); + goto out; + } + if (KAUTH_FILESEC_SIZE(host_acl_entrycount) > rsize) { + KAUTH_DEBUG("ACL - BUFFER OVERFLOW (%d entries too big for %d)", host_acl_entrycount, rsize); + goto out; + } } + kauth_filesec_acl_setendian(KAUTH_ENDIAN_HOST, fsec, NULL); + *fsecp = fsec; fsec = NULL; error = 0; @@ -1591,11 +1639,44 @@ vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) return(error); } +/* + * Set a filesec and optional acl contents into an extended attribute. + * function will attempt to store ACL, UUID, and GUID information using a + * write to a named extended attribute (KAUTH_FILESEC_XATTR). The 'acl' + * may or may not point to the `fsec->fsec_acl`, depending on whether the + * original caller supplied an acl. + * + * Parameters: vp The vnode on which to operate. + * fsec The filesec being set. + * acl The acl to be associated with 'fsec'. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: Both the fsec and the acl are always valid. + * + * The kauth_filesec_t in 'fsec', if any, is in host byte order, + * as are the acl contents, if they are used. Internally, we will + * cannonize these values into network (PPC) byte order before we + * attempt to write them so that the on-disk contents of the + * extended attribute are identical for both PPC and Intel (if we + * were not being required to provide this service via fallback, + * this would be the job of the filesystem 'VNOP_SETATTR' call). + * We reverse this process on the way out, so we leave with the + * same byte order we started with. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ static int vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context_t ctx) { - uio_t fsec_uio; - int error; + uio_t fsec_uio; + int error; + int i; + uint32_t saved_acl_copysize; fsec_uio = NULL; @@ -1604,8 +1685,16 @@ vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context error = ENOMEM; goto out; } + /* + * Save the pre-converted ACL copysize, because it gets swapped too + * if we are running with the wrong endianness. + */ + saved_acl_copysize = KAUTH_ACL_COPYSIZE(acl); + + kauth_filesec_acl_setendian(KAUTH_ENDIAN_DISK, fsec, acl); + uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), sizeof(struct kauth_filesec) - sizeof(struct kauth_acl)); - uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), KAUTH_ACL_COPYSIZE(acl)); + uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), saved_acl_copysize); error = vn_setxattr(vp, KAUTH_FILESEC_XATTR, fsec_uio, @@ -1613,6 +1702,8 @@ vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context ctx); VFS_DEBUG(ctx, vp, "SETATTR - set ACL returning %d", error); + kauth_filesec_acl_setendian(KAUTH_ENDIAN_HOST, fsec, acl); + out: if (fsec_uio != NULL) uio_free(fsec_uio); @@ -1826,6 +1917,28 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) return(error); } +/* + * Set the attributes on a vnode in a vnode context. + * + * Parameters: vp The vnode whose attributes to set. + * vap A pointer to the attributes to set. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. + * + * The contents of the data area pointed to by 'vap' may be + * modified if the vnode is on a filesystem which has been + * mounted with ingore ownership flags, or by the underlyng + * VFS itself, or by the fallback code, if the underlying VFS + * does not support ACL, UUID, or GUUID attributes directly. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) { @@ -1835,8 +1948,10 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) * Make sure the filesystem is mounted R/W. * If not, return an error. */ - if (vfs_isrdonly(vp->v_mount)) - return(EROFS); + if (vfs_isrdonly(vp->v_mount)) { + error = EROFS; + goto out; + } /* * If ownership is being ignored on this volume, we silently discard @@ -1858,7 +1973,8 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) if (!vfs_extendedsecurity(vnode_mount(vp)) && (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { KAUTH_DEBUG("SETATTR - returning ENOTSUP to request to set extended security"); - return(ENOTSUP); + error = ENOTSUP; + goto out; } error = VNOP_SETATTR(vp, vap, ctx); @@ -1889,13 +2005,35 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) add_fsevent(FSE_CHOWN, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } } + +out: return(error); } /* - * Following an operation which sets attributes (setattr, create, etc.) we may - * need to perform fallback operations to get attributes saved. - */ + * Fallback for setting the attributes on a vnode in a vnode context. This + * Function will attempt to store ACL, UUID, and GUID information utilizing + * a read/modify/write operation against an EA used as a backing store for + * the object. + * + * Parameters: vp The vnode whose attributes to set. + * vap A pointer to the attributes to set. + * ctx The vnode context in which the + * operation is to be attempted. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order, + * as are the fsec and lfsec, if they are used. + * + * The contents of the data area pointed to by 'vap' may be + * modified to indicate that the attribute is supported for + * any given requested attribute. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) { @@ -1909,7 +2047,8 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) /* * Extended security fallback via extended attributes. * - * Note that we do not free the filesec; the caller is expected to do this. + * Note that we do not free the filesec; the caller is expected to + * do this. */ if (VATTR_NOT_RETURNED(vap, va_acl) || VATTR_NOT_RETURNED(vap, va_uuuid) || @@ -1917,7 +2056,8 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) VFS_DEBUG(ctx, vp, "SETATTR - doing filesec fallback"); /* - * Fail for file types that we don't permit extended security to be set on. + * Fail for file types that we don't permit extended security + * to be set on. */ if ((vp->v_type != VDIR) && (vp->v_type != VLNK) && (vp->v_type != VREG)) { VFS_DEBUG(ctx, vp, "SETATTR - Can't write ACL to file type %d", vnode_vtype(vp)); @@ -1926,8 +2066,9 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) } /* - * If we don't have all the extended security items, we need to fetch the existing - * data to perform a read-modify-write operation. + * If we don't have all the extended security items, we need + * to fetch the existing data to perform a read-modify-write + * operation. */ fsec = NULL; if (!VATTR_IS_ACTIVE(vap, va_acl) || @@ -1982,7 +2123,8 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) } /* - * If the filesec data is all invalid, we can just remove the EA completely. + * If the filesec data is all invalid, we can just remove + * the EA completely. */ if ((facl->acl_entrycount == KAUTH_FILESEC_NOACL) && kauth_guid_equal(&fsec->fsec_owner, &kauth_null_guid) && diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 5371c4b3a..8f6f0633e 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -215,7 +215,7 @@ buf_timestamp(void) int lru_is_stale = LRU_IS_STALE; int age_is_stale = AGE_IS_STALE; int meta_is_stale = META_IS_STALE; - +static int boot_nbuf = 0; /* LIST_INSERT_HEAD() with assertions */ @@ -233,15 +233,19 @@ blistenterhead(struct bufhashhdr * head, buf_t bp) static __inline__ void binshash(buf_t bp, struct bufhashhdr *dp) { +#if DIAGNOSTIC buf_t nbp; +#endif /* DIAGNOSTIC */ BHASHENTCHECK(bp); +#if DIAGNOSTIC nbp = dp->lh_first; for(; nbp != NULL; nbp = nbp->b_hash.le_next) { if(nbp == bp) panic("buf already in hashlist"); } +#endif /* DIAGNOSTIC */ blistenterhead(dp, bp); } @@ -1371,15 +1375,17 @@ bufinit() int metabuf; long whichq; + nbuf = 0; /* Initialize the buffer queues ('freelists') and the hash table */ for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) TAILQ_INIT(dp); - bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); + bufhashtbl = hashinit(nbuf_hashelements, M_CACHE, &bufhash); - metabuf = nbuf/8; /* reserved for meta buf */ + metabuf = max_nbuf_headers/8; /* reserved for meta buf */ /* Initialize the buffer headers */ - for (i = 0; i < nbuf; i++) { + for (i = 0; i < max_nbuf_headers; i++) { + nbuf++; bp = &buf[i]; bufhdrinit(bp); @@ -1398,24 +1404,24 @@ bufinit() binshash(bp, &invalhash); } + boot_nbuf = nbuf; + for (; i < nbuf + niobuf; i++) { bp = &buf[i]; bufhdrinit(bp); binsheadfree(bp, &iobufqueue, -1); } - /* + /* * allocate lock group attribute and group */ - buf_mtx_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(buf_mtx_grp_attr); + buf_mtx_grp_attr = lck_grp_attr_alloc_init(); buf_mtx_grp = lck_grp_alloc_init("buffer cache", buf_mtx_grp_attr); /* * allocate the lock attribute */ buf_mtx_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(buf_mtx_attr); /* * allocate and initialize mutex's for the buffer and iobuffer pools @@ -2491,7 +2497,7 @@ allocbuf(buf_t bp, int size) bp->b_datap = (uintptr_t)zalloc(z); } else { bp->b_datap = (uintptr_t)NULL; - kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); CLR(bp->b_flags, B_ZALLOC); } bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); @@ -2504,7 +2510,7 @@ allocbuf(buf_t bp, int size) if ((vm_size_t)bp->b_bufsize < desired_size) { /* reallocate to a bigger size */ bp->b_datap = (uintptr_t)NULL; - kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); kmem_free(kernel_map, elem, bp->b_bufsize); } else { @@ -2519,7 +2525,7 @@ allocbuf(buf_t bp, int size) bp->b_datap = (uintptr_t)zalloc(z); SET(bp->b_flags, B_ZALLOC); } else - kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); } } bp->b_bufsize = desired_size; @@ -2567,6 +2573,16 @@ getnewbuf(int slpflag, int slptimeo, int * queue) if ((*queue > BQUEUES) || (*queue < 0) || (*queue == BQ_LAUNDRY) || (*queue == BQ_LOCKED)) *queue = BQ_EMPTY; + /* need to grow number of bufs, add another one rather than recycling */ + if (nbuf < max_nbuf_headers) { + /* + * Increment count now as lock + * is dropped for allocation. + * That avoids over commits + */ + nbuf++; + goto add_newbufs; + } /* * (*queue == BQUEUES) means no preference @@ -2593,6 +2609,13 @@ getnewbuf(int slpflag, int slptimeo, int * queue) *queue = BQ_EMPTY; goto found; } + /* + * We have seen is this is hard to trigger. + * This is an overcommit of nbufs but needed + * in some scenarios with diskiamges + */ + +add_newbufs: lck_mtx_unlock(buf_mtxp); /* Create a new temporary buffer header */ @@ -2610,6 +2633,9 @@ getnewbuf(int slpflag, int slptimeo, int * queue) buf_hdr_count++; goto found; } + /* subtract already accounted bufcount */ + nbuf--; + bufstats.bufs_sleeps++; /* wait for a free buffer of any kind */ @@ -2619,7 +2645,6 @@ getnewbuf(int slpflag, int slptimeo, int * queue) /* the hz value is 100; which leads to 10ms */ ts.tv_nsec = (slptimeo % 1000) * NSEC_PER_USEC * 1000 * 10; msleep(&needbuffer, buf_mtxp, slpflag|(PRIBIO+1), (char *)"getnewbuf", &ts); - return (0); } @@ -3113,9 +3138,12 @@ count_busy_buffers(void) buf_t bp; int nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) + lck_mtx_lock(buf_mtxp); + for (bp = &buf[boot_nbuf]; --bp >= buf; ) if (!ISSET(bp->b_flags, B_INVAL) && ISSET(bp->b_lflags, BL_BUSY)) nbusy++; + lck_mtx_unlock(buf_mtxp); + return (nbusy); } diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index fe7fe5269..e29ea4819 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -96,15 +96,37 @@ u_long nchash; /* size of hash table - 1 */ long numcache; /* number of cache entries allocated */ int desiredNodes; int desiredNegNodes; +int ncs_negtotal; TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */ TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */ + + +#if COLLECT_STATS + struct nchstats nchstats; /* cache effectiveness statistics */ +#define NCHSTAT(v) { \ + nchstats.v++; \ +} +#define NAME_CACHE_LOCK() name_cache_lock() +#define NAME_CACHE_UNLOCK() name_cache_unlock() +#define NAME_CACHE_LOCK_SHARED() name_cache_lock() + +#else + +#define NCHSTAT(v) +#define NAME_CACHE_LOCK() name_cache_lock() +#define NAME_CACHE_UNLOCK() name_cache_unlock() +#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared() + +#endif + + /* vars for name cache list lock */ lck_grp_t * namecache_lck_grp; lck_grp_attr_t * namecache_lck_grp_attr; lck_attr_t * namecache_lck_attr; -lck_mtx_t * namecache_mtx_lock; +lck_rw_t * namecache_rw_lock; static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); static int remove_name_locked(const char *); @@ -157,7 +179,7 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen) vp = vp->v_mount->mnt_vnodecovered; } } - name_cache_lock(); + NAME_CACHE_LOCK_SHARED(); while (vp && vp->v_parent != vp) { /* @@ -218,7 +240,7 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen) vp = vp->v_mount->mnt_vnodecovered; } } - name_cache_unlock(); + NAME_CACHE_UNLOCK(); out: /* * slide it down to the beginning of the buffer @@ -242,7 +264,7 @@ vnode_getparent(vnode_t vp) vnode_t pvp = NULLVP; int pvid; - name_cache_lock(); + NAME_CACHE_LOCK_SHARED(); /* * v_parent is stable behind the name_cache lock * however, the only thing we can really guarantee @@ -253,13 +275,12 @@ vnode_getparent(vnode_t vp) if ( (pvp = vp->v_parent) != NULLVP ) { pvid = pvp->v_id; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); if (vnode_getwithvid(pvp, pvid) != 0) pvp = NULL; } else - name_cache_unlock(); - + NAME_CACHE_UNLOCK(); return (pvp); } @@ -268,11 +289,11 @@ vnode_getname(vnode_t vp) { char *name = NULL; - name_cache_lock(); + NAME_CACHE_LOCK(); if (vp->v_name) name = add_name_locked(vp->v_name, strlen(vp->v_name), 0, 0); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); return (name); } @@ -280,11 +301,11 @@ vnode_getname(vnode_t vp) void vnode_putname(char *name) { - name_cache_lock(); + NAME_CACHE_LOCK(); remove_name_locked(name); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } @@ -313,7 +334,7 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, char *name, int name_len, int nam dvp = NULLVP; } else dvp = NULLVP; - name_cache_lock(); + NAME_CACHE_LOCK(); if ( (flags & VNODE_UPDATE_NAME) && (name != vp->v_name) ) { if (vp->v_name != NULL) { @@ -340,7 +361,7 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, char *name, int name_len, int nam while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) cache_delete(ncp, 1); } - name_cache_unlock(); + NAME_CACHE_UNLOCK(); if (dvp != NULLVP) vnode_rele(dvp); @@ -393,10 +414,10 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, char *name, int name_len, int nam * vnode_reclaim for each of the vnodes in the uu_vreclaims * list, we won't recurse back through here */ - name_cache_lock(); + NAME_CACHE_LOCK(); old_parentvp = vp->v_parent; vp->v_parent = NULLVP; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } else { /* * we're done... we ran into a vnode that isn't @@ -515,7 +536,7 @@ reverse_lookup(vnode_t start_vp, vnode_t *lookup_vpp, struct filedesc *fdp, vfs_ ucred = vfs_context_ucred(context); *lookup_vpp = start_vp; - name_cache_lock(); + NAME_CACHE_LOCK_SHARED(); if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & MNTK_AUTH_OPAQUE) ) { auth_opaque = 1; @@ -551,7 +572,7 @@ reverse_lookup(vnode_t start_vp, vnode_t *lookup_vpp, struct filedesc *fdp, vfs_ vid = dp->v_id; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); if (done == 0 && dp != start_vp) { if (vnode_getwithvid(dp, vid) != 0) { @@ -577,8 +598,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, ucred = vfs_context_ucred(context); *trailing_slash = 0; - name_cache_lock(); - + NAME_CACHE_LOCK_SHARED(); if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & MNTK_AUTH_OPAQUE) ) { auth_opaque = 1; @@ -708,7 +728,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, vvid = vp->v_id; vid = dp->v_id; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); if ((vp != NULLVP) && (vp->v_type != VLNK) && @@ -794,11 +814,14 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp) break; } } - if (ncp == 0) + if (ncp == 0) { /* * We failed to find an entry */ + NCHSTAT(ncs_miss); return (NULL); + } + NCHSTAT(ncs_goodhits); vp = ncp->nc_vp; if (vp && (vp->v_flag & VISHARDLINK)) { @@ -867,12 +890,14 @@ cache_lookup(dvp, vpp, cnp) register long namelen = cnp->cn_namelen; char *nameptr = cnp->cn_nameptr; unsigned int hashval = (cnp->cn_hash & NCHASHMASK); + boolean_t have_exclusive = FALSE; uint32_t vid; vnode_t vp; - name_cache_lock(); + NAME_CACHE_LOCK_SHARED(); ncpp = NCHHASH(dvp, cnp->cn_hash); +relook: LIST_FOREACH(ncp, ncpp, nc_hash) { if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { if (memcmp(ncp->nc_name, nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) @@ -881,31 +906,39 @@ cache_lookup(dvp, vpp, cnp) } /* We failed to find an entry */ if (ncp == 0) { - nchstats.ncs_miss++; - name_cache_unlock(); + NCHSTAT(ncs_miss); + NAME_CACHE_UNLOCK(); return (0); } /* We don't want to have an entry, so dump it */ if ((cnp->cn_flags & MAKEENTRY) == 0) { - nchstats.ncs_badhits++; - cache_delete(ncp, 1); - name_cache_unlock(); - return (0); + if (have_exclusive == TRUE) { + NCHSTAT(ncs_badhits); + cache_delete(ncp, 1); + NAME_CACHE_UNLOCK(); + return (0); + } + NAME_CACHE_UNLOCK(); + NAME_CACHE_LOCK(); + have_exclusive = TRUE; + goto relook; } vp = ncp->nc_vp; /* We found a "positive" match, return the vnode */ if (vp) { - nchstats.ncs_goodhits++; + NCHSTAT(ncs_goodhits); vid = vp->v_id; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); if (vnode_getwithvid(vp, vid)) { - name_cache_lock(); - nchstats.ncs_badvid++; - name_cache_unlock(); +#if COLLECT_STATS + NAME_CACHE_LOCK(); + NCHSTAT(ncs_badvid); + NAME_CACHE_UNLOCK(); +#endif return (0); } *vpp = vp; @@ -914,21 +947,27 @@ cache_lookup(dvp, vpp, cnp) /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) { - nchstats.ncs_badhits++; - cache_delete(ncp, 1); - name_cache_unlock(); - return (0); + if (have_exclusive == TRUE) { + NCHSTAT(ncs_badhits); + cache_delete(ncp, 1); + NAME_CACHE_UNLOCK(); + return (0); + } + NAME_CACHE_UNLOCK(); + NAME_CACHE_LOCK(); + have_exclusive = TRUE; + goto relook; } /* * We found a "negative" match, ENOENT notifies client of this match. * The nc_whiteout field records whether this is a whiteout. */ - nchstats.ncs_neghits++; + NCHSTAT(ncs_neghits); if (ncp->nc_whiteout) cnp->cn_flags |= ISWHITEOUT; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); return (ENOENT); } @@ -947,7 +986,7 @@ cache_enter(dvp, vp, cnp) if (cnp->cn_hash == 0) cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); - name_cache_lock(); + NAME_CACHE_LOCK(); /* if the entry is for -ve caching vp is null */ if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) { @@ -955,8 +994,8 @@ cache_enter(dvp, vp, cnp) * someone beat us to the punch.. * this vnode is already in the cache */ - name_cache_unlock(); - return; + NAME_CACHE_UNLOCK(); + return; } /* * We allocate a new entry if we are less than the maximum @@ -983,11 +1022,11 @@ cache_enter(dvp, vp, cnp) * still in use... we need to * delete it before re-using it */ - nchstats.ncs_stolen++; + NCHSTAT(ncs_stolen); cache_delete(ncp, 0); } } - nchstats.ncs_enters++; + NCHSTAT(ncs_enters); /* * Fill in cache info, if vp is NULL this is a "negative" cache entry. @@ -1035,9 +1074,9 @@ cache_enter(dvp, vp, cnp) if (cnp->cn_flags & ISWHITEOUT) ncp->nc_whiteout = TRUE; - nchstats.ncs_negtotal++; + ncs_negtotal++; - if (nchstats.ncs_negtotal > desiredNegNodes) { + if (ncs_negtotal > desiredNegNodes) { /* * if we've reached our desired limit * of negative cache entries, delete @@ -1055,7 +1094,7 @@ cache_enter(dvp, vp, cnp) */ LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } @@ -1111,31 +1150,34 @@ nchinit(void) /* Allocate mount list lock group attribute and group */ namecache_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(namecache_lck_grp_attr); namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr); /* Allocate mount list lock attribute */ namecache_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(namecache_lck_attr); /* Allocate mount list lock */ - namecache_mtx_lock = lck_mtx_alloc_init(namecache_lck_grp, namecache_lck_attr); + namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr); } +void +name_cache_lock_shared(void) +{ + lck_rw_lock_shared(namecache_rw_lock); +} + void name_cache_lock(void) { - lck_mtx_lock(namecache_mtx_lock); + lck_rw_lock_exclusive(namecache_rw_lock); } void name_cache_unlock(void) { - lck_mtx_unlock(namecache_mtx_lock); - + lck_rw_done(namecache_rw_lock); } @@ -1164,7 +1206,7 @@ resize_namecache(u_int newsize) return ENOMEM; } - name_cache_lock(); + NAME_CACHE_LOCK(); // do the switch! old_table = nchashtbl; nchashtbl = new_table; @@ -1192,7 +1234,7 @@ resize_namecache(u_int newsize) desiredNodes = dNodes; desiredNegNodes = dNegNodes; - name_cache_unlock(); + NAME_CACHE_UNLOCK(); FREE(old_table, M_CACHE); return 0; @@ -1201,13 +1243,13 @@ resize_namecache(u_int newsize) static void cache_delete(struct namecache *ncp, int age_entry) { - nchstats.ncs_deletes++; + NCHSTAT(ncs_deletes); if (ncp->nc_vp) { LIST_REMOVE(ncp, nc_un.nc_link); } else { TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry); - nchstats.ncs_negtotal--; + ncs_negtotal--; } LIST_REMOVE(ncp, nc_child); @@ -1245,7 +1287,7 @@ cache_purge(vnode_t vp) if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL)) return; - name_cache_lock(); + NAME_CACHE_LOCK(); while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) cache_delete(ncp, 1); @@ -1253,7 +1295,7 @@ cache_purge(vnode_t vp) while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) cache_delete(ncp, 1); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } /* @@ -1268,13 +1310,13 @@ cache_purge_negatives(vnode_t vp) { struct namecache *ncp; - name_cache_lock(); + NAME_CACHE_LOCK(); LIST_FOREACH(ncp, &vp->v_ncchildren, nc_child) if (ncp->nc_vp == NULL) cache_delete(ncp , 1); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } /* @@ -1290,7 +1332,7 @@ cache_purgevfs(mp) struct nchashhead *ncpp; struct namecache *ncp; - name_cache_lock(); + NAME_CACHE_LOCK(); /* Scan hash tables for applicable entries */ for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { restart: @@ -1301,7 +1343,7 @@ cache_purgevfs(mp) } } } - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } @@ -1387,9 +1429,9 @@ vfs_addname(const char *name, size_t len, u_int hashval, u_int flags) { char * ptr; - name_cache_lock(); + NAME_CACHE_LOCK(); ptr = add_name_locked(name, len, hashval, flags); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); return(ptr); } @@ -1456,9 +1498,9 @@ vfs_removename(const char *nameref) { int i; - name_cache_lock(); + NAME_CACHE_LOCK(); i = remove_name_locked(nameref); - name_cache_unlock(); + NAME_CACHE_UNLOCK(); return(i); @@ -1507,12 +1549,13 @@ dump_string_table(void) string_t *entry; u_long i; - name_cache_lock(); + NAME_CACHE_LOCK_SHARED(); + for (i = 0; i <= string_table_mask; i++) { head = &string_ref_table[i]; for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) { printf("%6d - %s\n", entry->refcount, entry->str); } } - name_cache_unlock(); + NAME_CACHE_UNLOCK(); } diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 69dd98910..e2b4b14d0 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -156,18 +156,16 @@ struct timeval priority_IO_timestamp_for_root; void cluster_init(void) { - /* + /* * allocate lock group attribute and group */ - cl_mtx_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(cl_mtx_grp_attr); + cl_mtx_grp_attr = lck_grp_attr_alloc_init(); cl_mtx_grp = lck_grp_alloc_init("cluster I/O", cl_mtx_grp_attr); /* * allocate the lock attribute */ cl_mtx_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(clf_mtx_attr); /* * allocate and initialize mutex's used to protect updates and waits diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 5949b796a..3688244ef 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -88,6 +87,7 @@ typedef struct fs_event_watcher { int32_t eventq_size; // number of event pointers in queue int32_t rd, wr; // indices to the event_queue int32_t blockers; + int32_t num_readers; } fs_event_watcher; // fs_event_watcher flags @@ -249,7 +249,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) kfs_event *kfse; fs_event_watcher *watcher; va_list ap; - int error = 0; + int error = 0, base; dev_t dev = 0; va_start(ap, ctx); @@ -267,8 +267,9 @@ add_fsevent(int type, vfs_context_t ctx, ...) // the lock is dropped. lock_fs_event_buf(); + base = free_event_idx; for(i=0; i < MAX_KFS_EVENTS; i++) { - if (fs_event_buf[(free_event_idx + i) % MAX_KFS_EVENTS].type == FSE_INVALID) { + if (fs_event_buf[(base + i) % MAX_KFS_EVENTS].type == FSE_INVALID) { break; } } @@ -290,12 +291,12 @@ add_fsevent(int type, vfs_context_t ctx, ...) return ENOSPC; } - kfse = &fs_event_buf[(free_event_idx + i) % MAX_KFS_EVENTS]; + kfse = &fs_event_buf[(base + i) % MAX_KFS_EVENTS]; - free_event_idx++; + free_event_idx = ((base + i) % MAX_KFS_EVENTS) + 1; kfse->type = type; - kfse->refcount = 0; + kfse->refcount = 1; kfse->pid = p->p_pid; unlock_fs_event_buf(); // at this point it's safe to unlock @@ -462,9 +463,8 @@ add_fsevent(int type, vfs_context_t ctx, ...) clean_up: // just in case no one was interested after all... - if (num_deliveries == 0) { + if (OSAddAtomic(-1, (SInt32 *)&kfse->refcount) == 1) { do_free_event(kfse); - free_event_idx = (int)(kfse - &fs_event_buf[0]); } lck_rw_done(&fsevent_big_lock); @@ -479,8 +479,10 @@ do_free_event(kfs_event *kfse) lock_fs_event_buf(); - // mark this fsevent as invalid - kfse->type = FSE_INVALID; + if (kfse->refcount > 0) { + panic("do_free_event: free'ing a kfsevent w/refcount == %d (kfse %p)\n", + kfse->refcount, kfse); + } // make a copy of this so we can free things without // holding the fs_event_buf lock @@ -490,6 +492,9 @@ do_free_event(kfs_event *kfse) // and just to be anal, set this so that there are no args kfse->args[0].type = FSE_ARG_DONE; + // mark this fsevent as invalid + kfse->type = FSE_INVALID; + free_event_idx = (kfse - fs_event_buf); unlock_fs_event_buf(); @@ -542,6 +547,7 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even watcher->rd = 0; watcher->wr = 0; watcher->blockers = 0; + watcher->num_readers = 0; lock_watch_list(); @@ -650,9 +656,15 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) return EINVAL; } + if (OSAddAtomic(1, (SInt32 *)&watcher->num_readers) != 0) { + // don't allow multiple threads to read from the fd at the same time + OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + return EAGAIN; + } if (watcher->rd == watcher->wr) { if (watcher->flags & WATCHER_CLOSING) { + OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); return 0; } OSAddAtomic(1, (SInt32 *)&watcher->blockers); @@ -663,6 +675,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) OSAddAtomic(-1, (SInt32 *)&watcher->blockers); if (error != 0 || (watcher->flags & WATCHER_CLOSING)) { + OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); return error; } } @@ -681,6 +694,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) } if (error) { + OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); return error; } @@ -850,6 +864,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) } get_out: + OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); return error; } @@ -902,6 +917,9 @@ fsevent_unmount(struct mount *mp) if (vname) vnode_putname(vname); + + strcpy(pathbuff, "UNKNOWN-FILE"); + pathbuff_len = strlen(pathbuff) + 1; } // switch the type of the string diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c index 4c4aabb22..d845bb150 100644 --- a/bsd/vfs/vfs_init.c +++ b/bsd/vfs/vfs_init.c @@ -282,14 +282,12 @@ vfsinit() struct mount * mp; /* Allocate vnode list lock group attribute and group */ - vnode_list_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(vnode_list_lck_grp_attr); + vnode_list_lck_grp_attr = lck_grp_attr_alloc_init(); vnode_list_lck_grp = lck_grp_alloc_init("vnode list", vnode_list_lck_grp_attr); /* Allocate vnode list lock attribute */ vnode_list_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(vnode_list_lck_attr); /* Allocate vnode list lock */ vnode_list_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr); @@ -299,36 +297,29 @@ vfsinit() /* allocate vnode lock group attribute and group */ vnode_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(vnode_lck_grp_attr); vnode_lck_grp = lck_grp_alloc_init("vnode", vnode_lck_grp_attr); /* Allocate vnode lock attribute */ vnode_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(vnode_lck_attr); /* Allocate fs config lock group attribute and group */ fsconf_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(fsconf_lck_grp_attr); fsconf_lck_grp = lck_grp_alloc_init("fs conf", fsconf_lck_grp_attr); /* Allocate fs config lock attribute */ fsconf_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(fsconf_lck_attr); - /* Allocate mount point related lock structures */ /* Allocate mount list lock group attribute and group */ mnt_list_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(mnt_list_lck_grp_attr); mnt_list_lck_grp = lck_grp_alloc_init("mount list", mnt_list_lck_grp_attr); /* Allocate mount list lock attribute */ mnt_list_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(mnt_list_lck_attr); /* Allocate mount list lock */ mnt_list_mtx_lock = lck_mtx_alloc_init(mnt_list_lck_grp, mnt_list_lck_attr); @@ -336,13 +327,11 @@ vfsinit() /* allocate mount lock group attribute and group */ mnt_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(mnt_lck_grp_attr); mnt_lck_grp = lck_grp_alloc_init("mount", mnt_lck_grp_attr); /* Allocate mount lock attribute */ mnt_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(mnt_lck_attr); /* * Initialize the "console user" for access purposes: diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index c1308807f..dba9c0b3c 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -197,9 +197,6 @@ journal_init() jnl_lock_attr = lck_attr_alloc_init(); jnl_group_attr = lck_grp_attr_alloc_init(); jnl_mutex_group = lck_grp_alloc_init("jnl-mutex", jnl_group_attr); - - /* Turn on lock debugging */ - //lck_attr_setdebug(jnl_lock_attr); } static __inline__ void diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index d10ba8fd3..f0aadcc4e 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -932,7 +932,7 @@ kdebug_lookup(dp, cnp) struct vnode *dp; struct componentname *cnp; { - register unsigned int i, n; + register unsigned int i, n, code; register int dbg_namelen; register int save_dbg_namelen; register char *dbg_nameptr; @@ -976,19 +976,27 @@ kdebug_lookup(dp, cnp) else dbg_parms[i++] = 0; } + dbg_namelen = save_dbg_namelen - 12; /* - In the event that we collect multiple, consecutive pathname - entries, we must mark the start of the path's string. - */ - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START, - (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); - - for (dbg_namelen = save_dbg_namelen-12, i=3; - dbg_namelen > 0; - dbg_namelen -=(4 * sizeof(long)), i+= 4) - { - KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_NONE, - dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); - } + * In the event that we collect multiple, consecutive pathname + * entries, we must mark the start of the path's string and the end + */ + code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; + + if (dbg_namelen <= 0) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT(code, (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); + + code &= ~DBG_FUNC_START; + + for (i = 3; dbg_namelen > 0; i += 4) { + + dbg_namelen -= (4 * sizeof(long)); + if (dbg_namelen <= 0) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); + } } diff --git a/bsd/vfs/vfs_quota.c b/bsd/vfs/vfs_quota.c index 118b7492d..93c5d052b 100644 --- a/bsd/vfs/vfs_quota.c +++ b/bsd/vfs/vfs_quota.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,6 +69,8 @@ #include #include +#include + /* vars for quota file lock */ lck_grp_t * qf_lck_grp; @@ -82,7 +84,7 @@ lck_attr_t * quota_list_lck_attr; lck_mtx_t * quota_list_mtx_lock; /* Routines to lock and unlock the quota global data */ -static void dq_list_lock(void); +static int dq_list_lock(void); static void dq_list_unlock(void); static void dq_lock_internal(struct dquot *dq); @@ -136,14 +138,12 @@ dqinit() * Allocate quota list lock group attribute and group */ quota_list_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(quota_list_lck_grp_attr); quota_list_lck_grp = lck_grp_alloc_init("quota list", quota_list_lck_grp_attr); /* * Allocate qouta list lock attribute */ quota_list_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(quota_list_lck_attr); /* * Allocate quota list lock @@ -155,22 +155,32 @@ dqinit() * allocate quota file lock group attribute and group */ qf_lck_grp_attr= lck_grp_attr_alloc_init(); - lck_grp_attr_setstat(qf_lck_grp_attr); qf_lck_grp = lck_grp_alloc_init("quota file", qf_lck_grp_attr); /* * Allocate quota file lock attribute */ qf_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(qf_lck_attr); } +static volatile int dq_list_lock_cnt = 0; -void +static int dq_list_lock(void) { lck_mtx_lock(quota_list_mtx_lock); + return ++dq_list_lock_cnt; +} + +static int +dq_list_lock_changed(int oldval) { + return (dq_list_lock_cnt != oldval); +} + +static int +dq_list_lock_val(void) { + return dq_list_lock_cnt; } void @@ -402,27 +412,27 @@ dqfileopen(qfp, type) goto out; } /* Sanity check the quota file header. */ - if ((header.dqh_magic != quotamagic[type]) || - (header.dqh_version > QF_VERSION) || - (!powerof2(header.dqh_maxentries)) || - (header.dqh_maxentries > (file_size / sizeof(struct dqblk)))) { + if ((OSSwapBigToHostInt32(header.dqh_magic) != quotamagic[type]) || + (OSSwapBigToHostInt32(header.dqh_version) > QF_VERSION) || + (!powerof2(OSSwapBigToHostInt32(header.dqh_maxentries))) || + (OSSwapBigToHostInt32(header.dqh_maxentries) > (file_size / sizeof(struct dqblk)))) { error = EINVAL; goto out; } /* Set up the time limits for this quota. */ - if (header.dqh_btime > 0) - qfp->qf_btime = header.dqh_btime; + if (header.dqh_btime != 0) + qfp->qf_btime = OSSwapBigToHostInt32(header.dqh_btime); else qfp->qf_btime = MAX_DQ_TIME; - if (header.dqh_itime > 0) - qfp->qf_itime = header.dqh_itime; + if (header.dqh_itime != 0) + qfp->qf_itime = OSSwapBigToHostInt32(header.dqh_itime); else qfp->qf_itime = MAX_IQ_TIME; /* Calculate the hash table constants. */ - qfp->qf_maxentries = header.dqh_maxentries; - qfp->qf_entrycnt = header.dqh_entrycnt; - qfp->qf_shift = dqhashshift(header.dqh_maxentries); + qfp->qf_maxentries = OSSwapBigToHostInt32(header.dqh_maxentries); + qfp->qf_entrycnt = OSSwapBigToHostInt32(header.dqh_entrycnt); + qfp->qf_shift = dqhashshift(qfp->qf_maxentries); out: return (error); } @@ -446,7 +456,7 @@ dqfileclose(struct quotafile *qfp, __unused int type) context.vc_ucred = qfp->qf_cred; if (VNOP_READ(qfp->qf_vp, auio, 0, &context) == 0) { - header.dqh_entrycnt = qfp->qf_entrycnt; + header.dqh_entrycnt = OSSwapHostToBigInt32(qfp->qf_entrycnt); uio_reset(auio, 0, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, CAST_USER_ADDR_T(&header), sizeof (header)); (void) VNOP_WRITE(qfp->qf_vp, auio, 0, &context); @@ -471,6 +481,7 @@ dqget(id, qfp, type, dqp) struct dqhash *dqh; struct vnode *dqvp; int error = 0; + int listlockval = 0; if ( id == 0 || qfp->qf_vp == NULLVP ) { *dqp = NODQUOT; @@ -494,6 +505,8 @@ dqget(id, qfp, type, dqp) dqh = DQHASH(dqvp, id); relookup: + listlockval = dq_list_lock_val(); + /* * Check the cache first. */ @@ -503,6 +516,11 @@ dqget(id, qfp, type, dqp) continue; dq_lock_internal(dq); + if (dq_list_lock_changed(listlockval)) { + dq_unlock_internal(dq); + goto relookup; + } + /* * dq_lock_internal may drop the quota_list_lock to msleep, so * we need to re-evaluate the identity of this dq @@ -521,6 +539,13 @@ dqget(id, qfp, type, dqp) TAILQ_REMOVE(&dqdirtylist, dq, dq_freelist); else TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); + } else if (dq->dq_cnt == 0) { + /* We've overflowed */ + --dq->dq_cnt; + dq_unlock_internal(dq); + dq_list_unlock(); + *dqp = NODQUOT; + return (EINVAL); } dq_unlock_internal(dq); @@ -573,7 +598,7 @@ dqget(id, qfp, type, dqp) ndq = (struct dquot *)_MALLOC(sizeof *dq, M_DQUOT, M_WAITOK); bzero((char *)ndq, sizeof *dq); - dq_list_lock(); + listlockval = dq_list_lock(); /* * need to look for the entry again in the cache * since we dropped the quota list lock and @@ -611,7 +636,7 @@ dqget(id, qfp, type, dqp) dq_lock_internal(dq); - if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) { + if (dq_list_lock_changed(listlockval) || dq->dq_cnt || (dq->dq_flags & DQ_MOD)) { /* * we lost the race while we weren't holding * the quota list lock... dq_lock_internal @@ -654,6 +679,10 @@ dqget(id, qfp, type, dqp) * one else can be trying to use this dq */ dq_lock_internal(dq); + if (dq_list_lock_changed(listlockval)) { + dq_unlock_internal(dq); + goto relookup; + } /* * Initialize the contents of the dquot structure. @@ -784,7 +813,7 @@ dqlookup(qfp, id, dqb, index) */ if (dqb->dqb_id == 0) { bzero(dqb, sizeof(struct dqblk)); - dqb->dqb_id = id; + dqb->dqb_id = OSSwapHostToBigInt32(id); /* * Write back to reserve entry for this id */ @@ -795,11 +824,22 @@ dqlookup(qfp, id, dqb, index) error = EIO; if (error == 0) ++qfp->qf_entrycnt; + dqb->dqb_id = id; break; } /* An id match means an entry was found. */ - if (dqb->dqb_id == id) + if (OSSwapBigToHostInt32(dqb->dqb_id) == id) { + dqb->dqb_bhardlimit = OSSwapBigToHostInt64(dqb->dqb_bhardlimit); + dqb->dqb_bsoftlimit = OSSwapBigToHostInt64(dqb->dqb_bsoftlimit); + dqb->dqb_curbytes = OSSwapBigToHostInt64(dqb->dqb_curbytes); + dqb->dqb_ihardlimit = OSSwapBigToHostInt32(dqb->dqb_ihardlimit); + dqb->dqb_isoftlimit = OSSwapBigToHostInt32(dqb->dqb_isoftlimit); + dqb->dqb_curinodes = OSSwapBigToHostInt32(dqb->dqb_curinodes); + dqb->dqb_btime = OSSwapBigToHostInt32(dqb->dqb_btime); + dqb->dqb_itime = OSSwapBigToHostInt32(dqb->dqb_itime); + dqb->dqb_id = OSSwapBigToHostInt32(dqb->dqb_id); break; + } } qf_unlock(qfp); @@ -871,14 +911,21 @@ dqsync_orphans(qfp) struct quotafile *qfp; { struct dquot *dq; - + int listlockval = 0; + dq_list_lock(); loop: + listlockval = dq_list_lock_val(); + TAILQ_FOREACH(dq, &dqdirtylist, dq_freelist) { if (dq->dq_qfile != qfp) continue; dq_lock_internal(dq); + if (dq_list_lock_changed(listlockval)) { + dq_unlock_internal(dq); + goto loop; + } if (dq->dq_qfile != qfp) { /* @@ -948,6 +995,7 @@ dqsync_locked(struct dquot *dq) struct proc *p = current_proc(); /* XXX */ struct vfs_context context; struct vnode *dqvp; + struct dqblk dqb, *dqblkp; uio_t auio; int error; char uio_buf[ UIO_SIZEOF(1) ]; @@ -963,11 +1011,26 @@ dqsync_locked(struct dquot *dq) auio = uio_createwithbuffer(1, dqoffset(dq->dq_index), UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, CAST_USER_ADDR_T(&dq->dq_dqb), sizeof (struct dqblk)); + uio_addiov(auio, CAST_USER_ADDR_T(&dqb), sizeof (struct dqblk)); context.vc_proc = p; context.vc_ucred = dq->dq_qfile->qf_cred; + dqblkp = &dq->dq_dqb; + dqb.dqb_bhardlimit = OSSwapHostToBigInt64(dqblkp->dqb_bhardlimit); + dqb.dqb_bsoftlimit = OSSwapHostToBigInt64(dqblkp->dqb_bsoftlimit); + dqb.dqb_curbytes = OSSwapHostToBigInt64(dqblkp->dqb_curbytes); + dqb.dqb_ihardlimit = OSSwapHostToBigInt32(dqblkp->dqb_ihardlimit); + dqb.dqb_isoftlimit = OSSwapHostToBigInt32(dqblkp->dqb_isoftlimit); + dqb.dqb_curinodes = OSSwapHostToBigInt32(dqblkp->dqb_curinodes); + dqb.dqb_btime = OSSwapHostToBigInt32(dqblkp->dqb_btime); + dqb.dqb_itime = OSSwapHostToBigInt32(dqblkp->dqb_itime); + dqb.dqb_id = OSSwapHostToBigInt32(dqblkp->dqb_id); + dqb.dqb_spare[0] = 0; + dqb.dqb_spare[1] = 0; + dqb.dqb_spare[2] = 0; + dqb.dqb_spare[3] = 0; + error = VNOP_WRITE(dqvp, auio, 0, &context); if (uio_resid(auio) && error == 0) error = EIO; diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index fd2250cc7..1ddfa971a 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -3990,6 +3990,47 @@ vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) return(vnode_setattr(vp, &va, ctx)); } +/* + * Create a filesystem object of arbitrary type with arbitrary attributes in + * the spevied directory with the specified name. + * + * Parameters: dvp Pointer to the vnode of the directory + * in which to create the object. + * vpp Pointer to the area into which to + * return the vnode of the created object. + * cnp Component name pointer from the namei + * data structure, containing the name to + * use for the create object. + * vap Pointer to the vnode_attr structure + * describing the object to be created, + * including the type of object. + * flags VN_* flags controlling ACL inheritance + * and whether or not authorization is to + * be required for the operation. + * + * Returns: 0 Success + * !0 errno value + * + * Implicit: *vpp Contains the vnode of the object that + * was created, if successful. + * *cnp May be modified by the underlying VFS. + * *vap May be modified by the underlying VFS. + * modified by either ACL inheritance or + * + * + * be modified, even if the operation is + * + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. + * + * Modification of '*cnp' and '*vap' by the underlying VFS is + * strongly discouraged. + * + * XXX: This function is a 'vn_*' function; it belongs in vfs_vnops.c + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ errno_t vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx) { diff --git a/bsd/vfs/vfs_support.c b/bsd/vfs/vfs_support.c index 0bf329efe..79e071e46 100644 --- a/bsd/vfs/vfs_support.c +++ b/bsd/vfs/vfs_support.c @@ -813,27 +813,6 @@ err_pageout(struct vnop_pageout_args *ap) } -struct vnop_devblocksize_args /* { - struct vnode *a_vp; - register_t *a_retval; -} */; - -int -nop_devblocksize(struct vnop_devblocksize_args *ap) -{ - /* XXX default value because the call sites do not check error */ - *ap->a_retval = 512; - return (0); -} - -int -err_devblocksize(struct vnop_devblocksize_args *ap) -{ - (void)nop_devblocksize(ap); - return (ENOTSUP); -} - - struct vnop_searchfs /* { struct vnode *a_vp; void *a_searchparams1; diff --git a/bsd/vfs/vfs_support.h b/bsd/vfs/vfs_support.h index 9e49a68a1..21c0b21e1 100644 --- a/bsd/vfs/vfs_support.h +++ b/bsd/vfs/vfs_support.h @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -51,7 +50,6 @@ #include #include #include -#include __BEGIN_DECLS extern int nop_create(struct vnop_create_args *ap); @@ -164,9 +162,6 @@ extern int err_pagein(struct vnop_pagein_args *ap); extern int nop_pageout(struct vnop_pageout_args *ap); extern int err_pageout(struct vnop_pageout_args *ap); -extern int nop_devblocksize(struct vnop_devblocksize_args *ap); -extern int err_devblocksize(struct vnop_devblocksize_args *ap); - extern int nop_searchfs(struct vnop_searchfs_args *ap); extern int err_searchfs(struct vnop_searchfs_args *ap); diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index e07690f8d..ae61ed7ad 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -96,7 +96,6 @@ #include -#include #include @@ -1522,6 +1521,30 @@ open1(vfs_context_t ctx, user_addr_t upath, int uflags, struct vnode_attr *vap, } +/* + * An open system call using an extended argument list compared to the regular + * system call 'open'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval Pointer to an area to receive the + * return calue from the system call + * + * Indirect: uap->path Path to open (same as 'open') + * uap->flags Flags to open (same as 'open' + * uap->uid UID to set, if creating + * uap->gid GID to set, if creating + * uap->mode File mode, if creating (same as 'open') + * uap->xsecurity ACL to set, if creating + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int open_extended(struct proc *p, struct open_extended_args *uap, register_t *retval) { @@ -1723,6 +1746,29 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) return error; } + +/* + * A mkfifo system call using an extended argument list compared to the regular + * system call 'mkfifo'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval (Ignored) + * + * Indirect: uap->path Path to fifo (same as 'mkfifo') + * uap->uid UID to set + * uap->gid GID to set + * uap->mode File mode to set (same as 'mkfifo') + * uap->xsecurity ACL to set, if creating + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int mkfifo_extended(struct proc *p, struct mkfifo_extended_args *uap, __unused register_t *retval) { @@ -2815,6 +2861,28 @@ chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) return(error); } +/* + * A chmod system call using an extended argument list compared to the regular + * system call 'mkfifo'. + * + * Parameters: p Process requesting the open + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path to object (same as 'chmod') + * uap->uid UID to set + * uap->gid GID to set + * uap->mode File mode to set (same as 'chmod') + * uap->xsecurity ACL to set (or delete) + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int chmod_extended(struct proc *p, struct chmod_extended_args *uap, __unused register_t *retval) { @@ -4701,7 +4769,8 @@ searchfs (struct proc *p, register struct searchfs_args *uap, __unused register_ searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; searchblock.maxmatches = tmp_searchblock.maxmatches; - searchblock.timelimit = tmp_searchblock.timelimit; + searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec; + searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec; searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1); searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1; searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2); @@ -5433,6 +5502,8 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, */ void munge_stat(struct stat *sbp, struct user_stat *usbp) { + bzero(usbp, sizeof(struct user_stat)); + usbp->st_dev = sbp->st_dev; usbp->st_ino = sbp->st_ino; usbp->st_mode = sbp->st_mode; diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index 11af9238a..97b08226f 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include /* * UTF-8 (Unicode Transformation Format) @@ -175,7 +175,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, ucs_ch = *ucsp++; if (swapbytes) - ucs_ch = NXSwapShort(ucs_ch); + ucs_ch = OSSwapInt16(ucs_ch); if (ucs_ch == '/') ucs_ch = altslash ? altslash : '_'; else if (ucs_ch == '\0') @@ -232,7 +232,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, --extra; ucs_ch = *chp++; } else { - ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++; + ucs_ch = swapbytes ? OSSwapInt16(*ucsp++) : *ucsp++; if (decompose && unicode_decomposeable(ucs_ch)) { extra = unicode_decompose(ucs_ch, sequence) - 1; @@ -276,7 +276,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, u_int16_t ch2; u_int32_t pair; - ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp; + ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp; if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) { pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT) + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE; @@ -414,13 +414,13 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST; if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) goto invalid; - *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch; + *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch; if (ucsp >= bufend) goto toolong; ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST; if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST) goto invalid; - *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch; + *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch; continue; default: goto invalid; @@ -434,7 +434,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, for (i = 0; i < count; ++i) { ucs_ch = sequence[i]; - *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch; + *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch; if (ucsp >= bufend) goto toolong; } @@ -445,7 +445,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, u_int16_t composite, base; if (unicode_combinable(ucs_ch)) { - base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1); + base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1); composite = unicode_combine(base, ucs_ch); if (composite) { --ucsp; @@ -470,7 +470,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, } combcharcnt = 0; /* start over */ } - *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch; + *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch; } /* * Make a previous combining sequence canonical diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 402d876ea..c68e439b9 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,51 @@ vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) return(vn_open_auth(ndp, fmodep, &va)); } +/* + * Open a file with authorization, updating the contents of the structures + * pointed to by ndp, fmodep, and vap as necessary to perform the requested + * operation. This function is used for both opens of existing files, and + * creation of new files. + * + * Parameters: ndp The nami data pointer describing the + * file + * fmodep A pointer to an int containg the mode + * information to be used for the open + * vap A pointer to the vnode attribute + * descriptor to be used for the open + * + * Indirect: * Contents of the data structures pointed + * to by the parameters are modified as + * necessary to the requested operation. + * + * Returns: 0 Success + * !0 errno value + * + * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. + * + * The contents of '*ndp' will be modified, based on the other + * arguments to this function, and to return file and directory + * data necessary to satisfy the requested operation. + * + * If the file does not exist and we are creating it, then the + * O_TRUNC flag will be cleared in '*fmodep' to indicate to the + * caller that the file was not truncated. + * + * If the file exists and the O_EXCL flag was not specified, then + * the O_CREAT flag will be cleared in '*fmodep' to indicate to + * the caller that the existing file was merely opened rather + * than created. + * + * The contents of '*vap' will be modified as necessary to + * complete the operation, including setting of supported + * attribute, clearing of fields containing unsupported attributes + * in the request, if the request proceeds without them, etc.. + * + * XXX: This function is too complicated in actings on its arguments + * + * XXX: We should enummerate the possible errno values here, and where + * in the code they originated. + */ int vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) { @@ -143,6 +189,10 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) dvp = NULL; fmode = *fmodep; if (fmode & O_CREAT) { + if ( (fmode & O_DIRECTORY) ) { + error = EINVAL; + goto out; + } ndp->ni_cnd.cn_nameiop = CREATE; ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | AUDITVNPATH1; @@ -231,6 +281,11 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) vp = ndp->ni_vp; nameidone(ndp); ndp->ni_dvp = NULL; + + if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) { + error = ENOTDIR; + goto bad; + } } if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) { error = EOPNOTSUPP; /* Operation not supported on socket */ diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index 9653ba143..a6063bdb7 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -37,7 +37,7 @@ #include -#include +#include #include /* @@ -473,9 +473,9 @@ typedef struct attr_info { ((u_int8_t *)ATTR_NEXT(ae) <= ((ai).rawdata + (ai).rawsize)) -#define SWAP16(x) NXSwapBigShortToHost((x)) -#define SWAP32(x) NXSwapBigIntToHost((x)) -#define SWAP64(x) NXSwapBigLongLongToHost((x)) +#define SWAP16(x) OSSwapBigToHostInt16((x)) +#define SWAP32(x) OSSwapBigToHostInt32((x)) +#define SWAP64(x) OSSwapBigToHostInt64((x)) static u_int32_t emptyfinfo[8] = {0}; @@ -1782,11 +1782,13 @@ write_xattrinfo(attr_info_t *ainfop) uio_addiov(auio, (uintptr_t)ainfop->filehdr, ainfop->iosize); swap_adhdr(ainfop->filehdr); + if (ainfop->attrhdr != NULL) swap_attrhdr(ainfop->attrhdr); error = VNOP_WRITE(ainfop->filevp, auio, 0, ainfop->context); swap_adhdr(ainfop->filehdr); + if (ainfop->attrhdr != NULL) swap_attrhdr(ainfop->attrhdr); return (error); } diff --git a/bsd/vfs/vnode_if.c b/bsd/vfs/vnode_if.c index b77c7c3d4..066760dda 100644 --- a/bsd/vfs/vnode_if.c +++ b/bsd/vfs/vnode_if.c @@ -736,22 +736,6 @@ struct vnodeop_desc vnop_pageout_desc = { NULL, }; -int vnop_devblocksize_vp_offsets[] = { - VOPARG_OFFSETOF(struct vnop_devblocksize_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vnop_devblocksize_desc = { - 0, - "vnop_devblocksize", - 0, - vnop_devblocksize_vp_offsets, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - NULL, -}; - int vnop_searchfs_vp_offsets[] = { VOPARG_OFFSETOF(struct vnop_searchfs_args,a_vp), VDESC_NO_OFFSET @@ -979,7 +963,6 @@ struct vnodeop_desc *vfs_op_descs[] = { &vnop_allocate_desc, &vnop_pagein_desc, &vnop_pageout_desc, - &vnop_devblocksize_desc, &vnop_searchfs_desc, &vnop_copyfile_desc, &vnop_getxattr_desc, diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index f20fc6e2c..5fedbadf9 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -58,9 +58,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -76,6 +78,12 @@ #include +void +log_nx_failure(addr64_t vaddr, vm_prot_t prot) +{ + printf("NX failure: %s - vaddr=%qx, prot=%x\n", current_proc()->p_comm, vaddr, prot); +} + int useracc( @@ -343,6 +351,19 @@ pid_for_task( * * XXX This should be a BSD system call, not a Mach trap!!! */ +/* + * + * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self + * tfp_policy = KERN_TFP_POLICY_PERMISSIVE; Permissive Mode: all permissive; related ones allowed or privileged + * tfp_policy = KERN_TFP_POLICY_RESTRICTED; Restricted Mode: self access allowed; setgid (to tfp_group) are allowed for other tasks + * + */ +static int tfp_policy = KERN_TFP_POLICY_RESTRICTED; +/* the groutp is inited to kmem group and is modifiable by sysctl */ +static int tfp_group_inited = 0; /* policy groups are loaded ... */ +static gid_t tfp_group_ronly = 0; /* procview group */ +static gid_t tfp_group_rw = 0; /* procmod group */ + kern_return_t task_for_pid( struct task_for_pid_args *args) @@ -357,7 +378,10 @@ task_for_pid( mach_port_name_t tret; void * sright; int error = 0; + int is_member = 0; boolean_t funnel_state; + boolean_t ispermitted = FALSE; + char procname[MAXCOMLEN+1]; AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); AUDIT_ARG(pid, pid); @@ -372,7 +396,7 @@ task_for_pid( funnel_state = thread_funnel_set(kernel_flock, TRUE); - p1 = get_bsdtask_info(t1); /* XXX current proc */ + p1 = current_proc(); /* * Delayed binding of thread credential to process credential, if we @@ -393,24 +417,70 @@ task_for_pid( p = pfind(pid); AUDIT_ARG(process, p); - if ( - (p != (struct proc *) 0) - && (p1 != (struct proc *) 0) - && ( - (p1 == p) - || !(suser(kauth_cred_get(), 0)) - || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) - && (p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid) - && ((p->p_flag & P_SUGID) == 0)) - ) - && (p->p_stat != SZOMB) - ) { - if (p->task != TASK_NULL) { - task_reference(p->task); - sright = (void *)convert_task_to_port(p->task); - tret = ipc_port_copyout_send( - sright, - get_task_ipcspace(current_task())); + switch (tfp_policy) { + + case KERN_TFP_POLICY_PERMISSIVE: + /* self or suser or related ones */ + if ((p != (struct proc *) 0) + && (p1 != (struct proc *) 0) + && ( + (p1 == p) + || !(suser(kauth_cred_get(), 0)) + || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && + ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)) + && ((p->p_flag & P_SUGID) == 0)) + ) + && (p->p_stat != SZOMB) + ) + ispermitted = TRUE; + break; + + case KERN_TFP_POLICY_RESTRICTED: + /* self or suser or setgid and related ones only */ + if ((p != (struct proc *) 0) + && (p1 != (struct proc *) 0) + && ( + (p1 == p) + || !(suser(kauth_cred_get(), 0)) + || (((tfp_group_inited != 0) && + ( + ((kauth_cred_ismember_gid(kauth_cred_get(), + tfp_group_ronly, &is_member) == 0) && is_member) + ||((kauth_cred_ismember_gid(kauth_cred_get(), + tfp_group_rw, &is_member) == 0) && is_member) + ) + ) + && ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && + ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid)) + && ((p->p_flag & P_SUGID) == 0)) + ) + ) + && (p->p_stat != SZOMB) + ) + ispermitted = TRUE; + + break; + + case KERN_TFP_POLICY_DENY: + /* self or suser only */ + default: + /* do not return task port of other task at all */ + if ((p1 != (struct proc *) 0) && (p != (struct proc *) 0) && (p->p_stat != SZOMB) + && ((p1 == p) || !(suser(kauth_cred_get(), 0)))) + ispermitted = TRUE; + else + ispermitted = FALSE; + break; + }; + + + if (ispermitted == TRUE) { + if (p->task != TASK_NULL) { + task_reference(p->task); + sright = (void *)convert_task_to_port(p->task); + tret = ipc_port_copyout_send( + sright, + get_task_ipcspace(current_task())); } else tret = MACH_PORT_NULL; AUDIT_ARG(mach_port2, tret); @@ -418,7 +488,20 @@ task_for_pid( task_deallocate(t1); error = KERN_SUCCESS; goto tfpout; + } else { + /* + * There is no guarantee that p_comm is null terminated and + * kernel implementation of string functions are complete. So + * ensure stale info is not leaked out, bzero the buffer + */ + bzero(&procname[0], MAXCOMLEN+1); + strncpy(&procname[0], &p1->p_comm[0], MAXCOMLEN); + if (tfp_policy != KERN_TFP_POLICY_PERMISSIVE) + log(LOG_NOTICE, "(%d: %s)tfp: failed on %d:\n", + ((p1 != PROC_NULL)?(p1->p_pid):0), &procname[0], + ((p != PROC_NULL)?(p->p_pid):0)); } + task_deallocate(t1); tret = MACH_PORT_NULL; (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); @@ -429,6 +512,182 @@ task_for_pid( return(error); } +/* + * Routine: task_name_for_pid + * Purpose: + * Get the task name port for another "process", named by its + * process ID on the same host as "target_task". + * + * Only permitted to privileged processes, or processes + * with the same user ID. + * + * XXX This should be a BSD system call, not a Mach trap!!! + */ + +kern_return_t +task_name_for_pid( + struct task_name_for_pid_args *args) +{ + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + struct uthread *uthread; + struct proc *p; + struct proc *p1; + task_t t1; + mach_port_name_t tret; + void * sright; + int error = 0; + boolean_t funnel_state; + + AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); + AUDIT_ARG(pid, pid); + AUDIT_ARG(mach_port1, target_tport); + + t1 = port_name_to_task(target_tport); + if (t1 == TASK_NULL) { + (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); + AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); + return(KERN_FAILURE); + } + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + p1 = current_proc(); + + /* + * Delayed binding of thread credential to process credential, if we + * are not running with an explicitly set thread credential. + */ + uthread = get_bsdthread_info(current_thread()); + if (uthread->uu_ucred != p1->p_ucred && + (uthread->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = uthread->uu_ucred; + proc_lock(p1); + uthread->uu_ucred = p1->p_ucred; + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(p1); + if (old != NOCRED) + kauth_cred_rele(old); + } + + p = pfind(pid); + AUDIT_ARG(process, p); + + if ((p != (struct proc *) 0) + && (p->p_stat != SZOMB) + && (p1 != (struct proc *) 0) + && ((p1 == p) + || !(suser(kauth_cred_get(), 0)) + || ((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && + ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))))) + { + if (p->task != TASK_NULL) + { + task_reference(p->task); + sright = (void *)convert_task_name_to_port(p->task); + tret = ipc_port_copyout_send( + sright, + get_task_ipcspace(current_task())); + } else + tret = MACH_PORT_NULL; + AUDIT_ARG(mach_port2, tret); + (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); + task_deallocate(t1); + error = KERN_SUCCESS; + goto tnfpout; + } + + task_deallocate(t1); + tret = MACH_PORT_NULL; + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); + error = KERN_FAILURE; +tnfpout: + thread_funnel_set(kernel_flock, funnel_state); + AUDIT_MACH_SYSCALL_EXIT(error); + return(error); +} + +static int +sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error = 0; + int new_value; + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || req->newptr == USER_ADDR_NULL) + return(error); + + if (!is_suser()) + return(EPERM); + + if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { + goto out; + } + if ((new_value == KERN_TFP_POLICY_DENY) + || (new_value == KERN_TFP_POLICY_PERMISSIVE) + || (new_value == KERN_TFP_POLICY_RESTRICTED)) + tfp_policy = new_value; + else + error = EINVAL; +out: + return(error); + +} + +static int +sysctl_settfp_groups(__unused struct sysctl_oid *oidp, void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error = 0; + int new_value; + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || req->newptr == USER_ADDR_NULL) + return(error); + + if (!is_suser()) + return(EPERM); + + /* + * Once set; cannot be reset till next boot. Launchd will set this + * in its pid 1 init and no one can set after that. + */ + if (tfp_group_inited != 0) + return(EPERM); + + if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { + goto out; + } + + if (new_value >= 100) + error = EINVAL; + else { + if (arg1 == &tfp_group_ronly) + tfp_group_ronly = new_value; + else if (arg1 == &tfp_group_rw) + tfp_group_rw = new_value; + else + error = EINVAL; + if ((tfp_group_ronly != 0 ) && (tfp_group_rw != 0 )) + tfp_group_inited = 1; + } + +out: + return(error); +} + +SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW, 0, "tfp"); +SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW, + &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); +SYSCTL_PROC(_kern_tfp, KERN_TFP_READ_GROUP, read_group, CTLTYPE_INT | CTLFLAG_RW, + &tfp_group_ronly, sizeof(uint32_t), &sysctl_settfp_groups ,"I","read_group"); +SYSCTL_PROC(_kern_tfp, KERN_TFP_RW_GROUP, rw_group, CTLTYPE_INT | CTLFLAG_RW, + &tfp_group_rw, sizeof(uint32_t), &sysctl_settfp_groups ,"I","rw_group"); + + +SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, &shared_region_trace_level, 0, ""); /* * shared_region_make_private_np: @@ -465,6 +724,12 @@ shared_region_make_private_np( range_count = uap->rangeCount; user_ranges = uap->ranges; + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_INFO, + ("shared_region: %p [%d(%s)] " + "make_private(rangecount=%d)\n", + current_thread(), p->p_pid, p->p_comm, range_count)); + /* allocate kernel space for the "ranges" */ if (range_count != 0) { kr = kmem_alloc(kernel_map, @@ -545,7 +810,18 @@ shared_region_make_private_np( range_count * sizeof (ranges[0])); ranges = NULL; } - + + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_INFO, + ("shared_region: %p [%d(%s)] " + "make_private(rangecount=%d) -> %d " + "shared_region=%p[%x,%x,%x]\n", + current_thread(), p->p_pid, p->p_comm, + range_count, error, shared_region, + task_mapping_info.fs_base, + task_mapping_info.system, + task_mapping_info.flags)); + return error; } @@ -607,17 +883,33 @@ shared_region_map_file_np( /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); if (error) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file: " + "fd=%d lookup failed (error=%d)\n", + current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } /* make sure we're attempting to map a vnode */ if (fp->f_fglob->fg_type != DTYPE_VNODE) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file: " + "fd=%d not a vnode (type=%d)\n", + current_thread(), p->p_pid, p->p_comm, + fd, fp->f_fglob->fg_type)); error = EINVAL; goto done; } /* we need at least read permission on the file */ if (! (fp->f_fglob->fg_flag & FREAD)) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file: " + "fd=%d not readable\n", + current_thread(), p->p_pid, p->p_comm, fd)); error = EPERM; goto done; } @@ -625,12 +917,23 @@ shared_region_map_file_np( /* get vnode from file structure */ error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data); if (error) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file: " + "fd=%d getwithref failed (error=%d)\n", + current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } vp = (struct vnode *) fp->f_fglob->fg_data; /* make sure the vnode is a regular file */ if (vp->v_type != VREG) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file(%p:'%s'): " + "not a file (type=%d)\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, vp->v_type)); error = EINVAL; goto done; } @@ -641,8 +944,16 @@ shared_region_map_file_np( context.vc_proc = p; context.vc_ucred = kauth_cred_get(); - if ((error = vnode_size(vp, &fs, &context)) != 0) + if ((error = vnode_size(vp, &fs, &context)) != 0) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "vnode_size(%p) failed (error=%d)\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, vp)); goto done; + } file_size = fs; } @@ -651,6 +962,12 @@ shared_region_map_file_np( */ mapping_count = uap->mappingCount; /* the number of mappings */ if (mapping_count == 0) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_INFO, + ("shared_region: %p [%d(%s)] map_file(%p:'%s'): " + "no mappings\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name)); error = 0; /* no mappings: we're done ! */ goto done; } else if (mapping_count <= SFM_MAX_STACK) { @@ -661,6 +978,13 @@ shared_region_map_file_np( (vm_size_t) (mapping_count * sizeof (mappings[0]))); if (kr != KERN_SUCCESS) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "failed to allocate %d mappings (kr=0x%x)\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, mapping_count, kr)); error = ENOMEM; goto done; } @@ -671,6 +995,12 @@ shared_region_map_file_np( mappings, (mapping_count * sizeof (mappings[0]))); if (error != 0) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file(%p:'%s'): " + "failed to copyin %d mappings (error=%d)\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, mapping_count, error)); goto done; } @@ -699,6 +1029,15 @@ shared_region_map_file_np( /* this mapping is not in the shared region... */ if (user_slide_p == NULL) { /* ... and we can't slide it in: fail */ + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "mapping %p not in shared segment & " + "no sliding\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, + mappings[j].sfm_address)); error = EINVAL; goto done; } @@ -707,6 +1046,15 @@ shared_region_map_file_np( mappings_in_segment = FALSE; } else if (mappings_in_segment != FALSE) { /* other mappings were not outside: fail */ + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "mapping %p not in shared segment & " + "other mappings in shared segment\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, + mappings[j].sfm_address)); error = EINVAL; goto done; } @@ -717,6 +1065,15 @@ shared_region_map_file_np( mappings_in_segment = TRUE; } else if (mappings_in_segment != TRUE) { /* other mappings were not inside: fail */ + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "mapping %p in shared segment & " + "others in shared segment\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, + mappings[j].sfm_address)); error = EINVAL; goto done; } @@ -760,6 +1117,12 @@ shared_region_map_file_np( UBCINFOCHECK("shared_region_map_file_np", vp); file_control = ubc_getobject(vp, UBC_HOLDOBJECT); if (file_control == MEMORY_OBJECT_CONTROL_NULL) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p [%d(%s)] map_file(%p:'%s'): " + "ubc_getobject() failed\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name)); error = EINVAL; goto done; } @@ -808,6 +1171,13 @@ shared_region_map_file_np( * (via shared_region_make_private()) the shared region and * try to establish the mapping privately for this process. */ + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "not on root volume\n", + current_thread(), p->p_pid, p->p_comm, + vp->v_name)); error = EXDEV; goto done; } @@ -824,8 +1194,7 @@ shared_region_map_file_np( base_offset, (user_slide_p) ? &slide : NULL); - switch (kr) { - case KERN_SUCCESS: + if (kr == KERN_SUCCESS) { /* * The mapping was successful. Let the buffer cache know * that we've mapped that file with these protections. This @@ -833,21 +1202,30 @@ shared_region_map_file_np( */ (void) ubc_map(vp, max_prot); error = 0; - break; - case KERN_INVALID_ADDRESS: - error = EFAULT; - goto done; - case KERN_PROTECTION_FAILURE: - error = EPERM; - goto done; - case KERN_NO_SPACE: - error = ENOMEM; - goto done; - case KERN_FAILURE: - case KERN_INVALID_ARGUMENT: - default: - error = EINVAL; - goto done; + } else { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "map_shared_file failed, kr=0x%x\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, kr)); + switch (kr) { + case KERN_INVALID_ADDRESS: + error = EFAULT; + goto done; + case KERN_PROTECTION_FAILURE: + error = EPERM; + goto done; + case KERN_NO_SPACE: + error = ENOMEM; + goto done; + case KERN_FAILURE: + case KERN_INVALID_ARGUMENT: + default: + error = EINVAL; + goto done; + } } if (p->p_flag & P_NOSHLIB) { @@ -876,7 +1254,16 @@ shared_region_map_file_np( } error = copyout(&slide, user_slide_p, - sizeof (int64_t)); + sizeof (slide)); + if (slide != 0) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p [%d(%s)] " + "map_file(%p:'%s'): " + "slid by 0x%llx\n", + current_thread(), p->p_pid, p->p_comm, + vp, vp->v_name, slide)); + } } done: @@ -1216,6 +1603,9 @@ reset_shared_file(__unused struct proc *p, struct reset_shared_file_args *uap, vm_offset_t map_address; int i; kern_return_t kret; + shared_region_mapping_t shared_region; + struct shared_region_task_mappings task_mapping_info; + shared_region_mapping_t next; AUDIT_ARG(addr, CAST_DOWN(user_addr_t, base_address)); /* Retrieve the base address */ @@ -1243,6 +1633,23 @@ reset_shared_file(__unused struct proc *p, struct reset_shared_file_args *uap, (vm_size_t)(map_cnt*sizeof(sf_mapping_t))); goto rsf_bailout; } + + vm_get_shared_region(current_task(), &shared_region); + task_mapping_info.self = (vm_offset_t) shared_region; + shared_region_mapping_info(shared_region, + &(task_mapping_info.text_region), + &(task_mapping_info.text_size), + &(task_mapping_info.data_region), + &(task_mapping_info.data_size), + &(task_mapping_info.region_mappings), + &(task_mapping_info.client_base), + &(task_mapping_info.alternate_base), + &(task_mapping_info.alternate_next), + &(task_mapping_info.fs_base), + &(task_mapping_info.system), + &(task_mapping_info.flags), + &next); + for (i = 0; iv_name; + return KERN_SUCCESS; +} + pager_return_t vnode_pageout(struct vnode *vp, upl_t upl, diff --git a/bsd/vm/vnode_pager.h b/bsd/vm/vnode_pager.h index 7aea26deb..6db5fcb71 100644 --- a/bsd/vm/vnode_pager.h +++ b/bsd/vm/vnode_pager.h @@ -133,6 +133,15 @@ pager_return_t vnode_pageout(struct vnode *, upl_t, extern vm_object_offset_t vnode_pager_get_filesize( struct vnode *vp); +extern kern_return_t vnode_pager_get_pathname( + struct vnode *vp, + char *pathname, + vm_size_t *length_p); + +extern kern_return_t vnode_pager_get_filename( + struct vnode *vp, + char **filename); + #endif /* KERNEL */ #endif /* _VNODE_PAGER_ */ diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index eea8f21d8..43d98269d 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -124,7 +124,6 @@ _err_bwrite _err_close _err_copyfile _err_create -_err_devblocksize _err_exchange _err_fsync _err_getattr @@ -408,7 +407,6 @@ _nop_bwrite _nop_close _nop_copyfile _nop_create -_nop_devblocksize _nop_exchange _nop_fsync _nop_getattr diff --git a/config/IOKit.exports b/config/IOKit.exports index 4729bf597..6cfdf9312 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -164,9 +164,9 @@ __ZN10IOWorkLoop10wakeupGateEPvb __ZN10IOWorkLoop12tryCloseGateEv __ZN10IOWorkLoop13_maintRequestEPvS0_S0_S0_ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource +__ZN10IOWorkLoop15runEventSourcesEv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev @@ -261,6 +261,49 @@ __ZN11IOResourcesC2EPK11OSMetaClass __ZN11IOResourcesC2Ev __ZN11IOResourcesD0Ev __ZN11IOResourcesD2Ev +__ZN12IODMACommand10gMetaClassE +__ZN12IODMACommand10superClassE +__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm +__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm +__ZN12IODMACommand11synchronizeEm +__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm +__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm +__ZN12IODMACommand12cloneCommandEPv +__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm +__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm +__ZN12IODMACommand15genIOVMSegmentsEPyPvPm +__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand19setMemoryDescriptorEPK18IOMemoryDescriptorb +__ZN12IODMACommand21clearMemoryDescriptorEb +__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand22_RESERVEDIODMACommand0Ev +__ZN12IODMACommand22_RESERVEDIODMACommand1Ev +__ZN12IODMACommand22_RESERVEDIODMACommand2Ev +__ZN12IODMACommand22_RESERVEDIODMACommand3Ev +__ZN12IODMACommand22_RESERVEDIODMACommand4Ev +__ZN12IODMACommand22_RESERVEDIODMACommand5Ev +__ZN12IODMACommand22_RESERVEDIODMACommand6Ev +__ZN12IODMACommand22_RESERVEDIODMACommand7Ev +__ZN12IODMACommand22_RESERVEDIODMACommand8Ev +__ZN12IODMACommand22_RESERVEDIODMACommand9Ev +__ZN12IODMACommand23_RESERVEDIODMACommand10Ev +__ZN12IODMACommand23_RESERVEDIODMACommand11Ev +__ZN12IODMACommand23_RESERVEDIODMACommand12Ev +__ZN12IODMACommand23_RESERVEDIODMACommand13Ev +__ZN12IODMACommand23_RESERVEDIODMACommand14Ev +__ZN12IODMACommand23_RESERVEDIODMACommand15Ev +__ZN12IODMACommand4freeEv +__ZN12IODMACommand7prepareEyybb +__ZN12IODMACommand8completeEbb +__ZN12IODMACommand9MetaClassC1Ev +__ZN12IODMACommand9MetaClassC2Ev +__ZN12IODMACommand9metaClassE +__ZN12IODMACommandC1EPK11OSMetaClass +__ZN12IODMACommandC1Ev +__ZN12IODMACommandC2EPK11OSMetaClass +__ZN12IODMACommandC2Ev +__ZN12IODMACommandD0Ev +__ZN12IODMACommandD2Ev __ZN12IOPMinformee10gMetaClassE __ZN12IOPMinformee10initializeEP9IOService __ZN12IOPMinformee10superClassE @@ -334,6 +377,8 @@ __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor __ZN12IOUserClient30getExternalAsyncMethodForIndexEm __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem __ZN12IOUserClient4freeEv +__ZN12IOUserClient4initEP12OSDictionary +__ZN12IOUserClient4initEv __ZN12IOUserClient9MetaClassC1Ev __ZN12IOUserClient9MetaClassC2Ev __ZN12IOUserClient9metaClassE @@ -369,6 +414,7 @@ __ZN13IOCommandGate10gMetaClassE __ZN13IOCommandGate10runCommandEPvS0_S0_S0_ __ZN13IOCommandGate10superClassE __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E +__ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop __ZN13IOCommandGate12checkForWorkEv __ZN13IOCommandGate12commandSleepEPvm __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ @@ -382,7 +428,10 @@ __ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev +__ZN13IOCommandGate4freeEv __ZN13IOCommandGate4initEP8OSObjectPFiS1_PvS2_S2_S2_E +__ZN13IOCommandGate6enableEv +__ZN13IOCommandGate7disableEv __ZN13IOCommandGate9MetaClassC1Ev __ZN13IOCommandGate9MetaClassC2Ev __ZN13IOCommandGate9metaClassE @@ -506,9 +555,11 @@ __ZN14IOPMrootDomain12tellChangeUpEm __ZN14IOPMrootDomain12unIdleDeviceEP9IOServicem __ZN14IOPMrootDomain12wakeFromDozeEv __ZN14IOPMrootDomain13askChangeDownEm +__ZN14IOPMrootDomain13copyPMSettingEP8OSSymbol __ZN14IOPMrootDomain13restartSystemEv __ZN14IOPMrootDomain13setPropertiesEP8OSObject __ZN14IOPMrootDomain14publishFeatureEPKc +__ZN14IOPMrootDomain14publishFeatureEPKcjPj __ZN14IOPMrootDomain14shutdownSystemEv __ZN14IOPMrootDomain14tellChangeDownEm __ZN14IOPMrootDomain15powerChangeDoneEm @@ -522,6 +573,7 @@ __ZN14IOPMrootDomain17setSleepSupportedEm __ZN14IOPMrootDomain18changePowerStateToEm __ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain22changePowerStateToPrivEm +__ZN14IOPMrootDomain22removePublishedFeatureEj __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm __ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv __ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService @@ -530,7 +582,8 @@ __ZN14IOPMrootDomain25announcePowerSourceChangeEv __ZN14IOPMrootDomain26handleSleepTimerExpirationEv __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv __ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j -__ZN14IOPMrootDomain27registerPMSettingControllerEPFiiiPvES0_ +__ZN14IOPMrootDomain27registerPMSettingControllerEPPK8OSSymbolPFiP8OSObjectS2_S5_mES5_mPS5_ +__ZN14IOPMrootDomain27registerPMSettingControllerEPPK8OSSymboljPFiP8OSObjectS2_S5_mES5_mPS5_ __ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev @@ -563,24 +616,54 @@ __ZN15IOConditionLockC2EPK11OSMetaClass __ZN15IOConditionLockC2Ev __ZN15IOConditionLockD0Ev __ZN15IOConditionLockD2Ev +__ZN15IOPMPowerSource10cycleCountEv __ZN15IOPMPowerSource10gMetaClassE __ZN15IOPMPowerSource10isChargingEv +__ZN15IOPMPowerSource10setVoltageEj __ZN15IOPMPowerSource10superClassE -__ZN15IOPMPowerSource11acConnectedEv +__ZN15IOPMPowerSource11adapterInfoEv __ZN15IOPMPowerSource11atWarnLevelEv -__ZN15IOPMPowerSource11curCapacityEv -__ZN15IOPMPowerSource11isInstalledEv __ZN15IOPMPowerSource11maxCapacityEv -__ZN15IOPMPowerSource12currentDrawnEv +__ZN15IOPMPowerSource11powerSourceEv +__ZN15IOPMPowerSource11setAmperageEi +__ZN15IOPMPowerSource11setLocationEi +__ZN15IOPMPowerSource12manufacturerEv __ZN15IOPMPowerSource12updateStatusEv +__ZN15IOPMPowerSource13setCycleCountEj +__ZN15IOPMPowerSource13setIsChargingEb __ZN15IOPMPowerSource13timeRemainingEv +__ZN15IOPMPowerSource14errorConditionEv +__ZN15IOPMPowerSource14setAdapterInfoEi +__ZN15IOPMPowerSource14setAtWarnLevelEb +__ZN15IOPMPowerSource14setMaxCapacityEj +__ZN15IOPMPowerSource15atCriticalLevelEv +__ZN15IOPMPowerSource15currentCapacityEv +__ZN15IOPMPowerSource15setManufacturerEP8OSSymbol +__ZN15IOPMPowerSource16batteryInstalledEv +__ZN15IOPMPowerSource16setTimeRemainingEi +__ZN15IOPMPowerSource17externalConnectedEv +__ZN15IOPMPowerSource17setErrorConditionEP8OSSymbol +__ZN15IOPMPowerSource18setAtCriticalLevelEb +__ZN15IOPMPowerSource18setCurrentCapacityEj +__ZN15IOPMPowerSource19legacyIOBatteryInfoEv +__ZN15IOPMPowerSource19setBatteryInstalledEb +__ZN15IOPMPowerSource20setExternalConnectedEb +__ZN15IOPMPowerSource21externalChargeCapableEv +__ZN15IOPMPowerSource22setLegacyIOBatteryInfoEP12OSDictionary __ZN15IOPMPowerSource24capacityPercentRemainingEv -__ZN15IOPMPowerSource4initEt +__ZN15IOPMPowerSource24setExternalChargeCapableEb +__ZN15IOPMPowerSource4freeEv +__ZN15IOPMPowerSource4initEv +__ZN15IOPMPowerSource5modelEv +__ZN15IOPMPowerSource6serialEv __ZN15IOPMPowerSource7voltageEv -__ZN15IOPMPowerSource8depletedEv +__ZN15IOPMPowerSource8amperageEv +__ZN15IOPMPowerSource8locationEv +__ZN15IOPMPowerSource8setModelEP8OSSymbol __ZN15IOPMPowerSource9MetaClassC1Ev __ZN15IOPMPowerSource9MetaClassC2Ev __ZN15IOPMPowerSource9metaClassE +__ZN15IOPMPowerSource9setSerialEP8OSSymbol __ZN15IOPMPowerSourceC1EPK11OSMetaClass __ZN15IOPMPowerSourceC1Ev __ZN15IOPMPowerSourceC2EPK11OSMetaClass @@ -817,11 +900,12 @@ __ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap __ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper __ZN18IOMemoryDescriptor16getSourceSegmentEmPm __ZN18IOMemoryDescriptor16performOperationEmmm +__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task +__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task __ZN18IOMemoryDescriptor18getPhysicalAddressEv __ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor5Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev @@ -1085,6 +1169,7 @@ __ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm __ZN21IOSubMemoryDescriptor16performOperationEmmm __ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm +__ZN21IOSubMemoryDescriptor20getPhysicalSegment64EmPm __ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN21IOSubMemoryDescriptor4freeEv @@ -1187,9 +1272,10 @@ __ZN24IOBufferMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task __ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy __ZN24IOBufferMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN24IOBufferMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor1Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev @@ -1229,6 +1315,7 @@ __ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv __ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm __ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm __ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm +__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm __ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN25IOGeneralMemoryDescriptor4freeEv @@ -1350,7 +1437,6 @@ __ZN8IOMapper10superClassE __ZN8IOMapper11NewARTTableEmPPvPj __ZN8IOMapper12FreeARTTableEP6OSDatam __ZN8IOMapper17setMapperRequiredEb -__ZN8IOMapper18_RESERVEDIOMapper0Ev __ZN8IOMapper18_RESERVEDIOMapper1Ev __ZN8IOMapper18_RESERVEDIOMapper2Ev __ZN8IOMapper18_RESERVEDIOMapper3Ev @@ -1499,6 +1585,7 @@ __ZN9IOService15getDeviceMemoryEv __ZN9IOService15getPMRootDomainEv __ZN9IOService15instruct_driverEm __ZN9IOService15lookupInterruptEibPP21IOInterruptController +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService15powerChangeDoneEm __ZN9IOService15probeCandidatesEP12OSOrderedSet __ZN9IOService15publishResourceEPK8OSSymbolP8OSObject @@ -1522,6 +1609,7 @@ __ZN9IOService16applyToProvidersEPFvPS_PvES1_ __ZN9IOService16command_receivedEPvS0_S0_S0_ __ZN9IOService16didYouWakeSystemEv __ZN9IOService16disableInterruptEi +__ZN9IOService16getCPUSnoopDelayEv __ZN9IOService16getInterruptTypeEiPi __ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ __ZN9IOService16removePowerChildEP17IOPowerConnection @@ -1530,6 +1618,7 @@ __ZN9IOService16resolveInterruptEPS_i __ZN9IOService16resourceMatchingEPK8OSStringP12OSDictionary __ZN9IOService16resourceMatchingEPKcP12OSDictionary __ZN9IOService16scheduleFinalizeEv +__ZN9IOService16setCPUSnoopDelayEm __ZN9IOService16startSettleTimerEm __ZN9IOService16start_our_changeEm __ZN9IOService16stringFromReturnEi @@ -1552,9 +1641,9 @@ __ZN9IOService18getResourceServiceEv __ZN9IOService18lockForArbitrationEb __ZN9IOService18matchPropertyTableEP12OSDictionary __ZN9IOService18matchPropertyTableEP12OSDictionaryPl +__ZN9IOService18requireMaxBusStallEm __ZN9IOService18setIdleTimerPeriodEm __ZN9IOService18settleTimerExpiredEv -__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService19_RESERVEDIOService4Ev __ZN9IOService19_RESERVEDIOService5Ev __ZN9IOService19_RESERVEDIOService6Ev @@ -1611,22 +1700,6 @@ __ZN9IOService20_RESERVEDIOService44Ev __ZN9IOService20_RESERVEDIOService45Ev __ZN9IOService20_RESERVEDIOService46Ev __ZN9IOService20_RESERVEDIOService47Ev -__ZN9IOService20_RESERVEDIOService48Ev -__ZN9IOService20_RESERVEDIOService49Ev -__ZN9IOService20_RESERVEDIOService50Ev -__ZN9IOService20_RESERVEDIOService51Ev -__ZN9IOService20_RESERVEDIOService52Ev -__ZN9IOService20_RESERVEDIOService53Ev -__ZN9IOService20_RESERVEDIOService54Ev -__ZN9IOService20_RESERVEDIOService55Ev -__ZN9IOService20_RESERVEDIOService56Ev -__ZN9IOService20_RESERVEDIOService57Ev -__ZN9IOService20_RESERVEDIOService58Ev -__ZN9IOService20_RESERVEDIOService59Ev -__ZN9IOService20_RESERVEDIOService60Ev -__ZN9IOService20_RESERVEDIOService61Ev -__ZN9IOService20_RESERVEDIOService62Ev -__ZN9IOService20_RESERVEDIOService63Ev __ZN9IOService20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_ __ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_ __ZN9IOService20getDeviceMemoryCountEv @@ -1745,6 +1818,9 @@ __ZNK11IOMemoryMap9MetaClass5allocEv __ZNK11IOResources11getWorkLoopEv __ZNK11IOResources12getMetaClassEv __ZNK11IOResources9MetaClass5allocEv +__ZNK12IODMACommand12getMetaClassEv +__ZNK12IODMACommand19getMemoryDescriptorEv +__ZNK12IODMACommand9MetaClass5allocEv __ZNK12IOPMinformee12getMetaClassEv __ZNK12IOPMinformee9MetaClass5allocEv __ZNK12IORootParent12getMetaClassEv @@ -1840,6 +1916,7 @@ __ZNK17IOPowerConnection12getMetaClassEv __ZNK17IOPowerConnection9MetaClass5allocEv __ZNK18IOMemoryDescriptor12getDirectionEv __ZNK18IOMemoryDescriptor12getMetaClassEv +__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj __ZNK18IOMemoryDescriptor9MetaClass5allocEv __ZNK18IOMemoryDescriptor9getLengthEv __ZNK18IOPMchangeNoteList12getMetaClassEv @@ -1865,6 +1942,7 @@ __ZNK21IOInterruptController9MetaClass5allocEv __ZNK21IONaturalMemoryCursor12getMetaClassEv __ZNK21IONaturalMemoryCursor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor12getMetaClassEv +__ZNK21IOSubMemoryDescriptor19dmaCommandOperationEmPvj __ZNK21IOSubMemoryDescriptor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor9serializeEP11OSSerialize __ZNK22IOInterruptEventSource11getIntIndexEv @@ -1880,6 +1958,7 @@ __ZNK24IOBufferMemoryDescriptor11getCapacityEv __ZNK24IOBufferMemoryDescriptor12getMetaClassEv __ZNK24IOBufferMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor12getMetaClassEv +__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj __ZNK25IOGeneralMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor9serializeEP11OSSerialize __ZNK25IOServiceUserNotification12getMetaClassEv @@ -1894,6 +1973,7 @@ __ZNK28IOFilterInterruptEventSource9MetaClass5allocEv __ZNK32IOServiceMessageUserNotification12getMetaClassEv __ZNK32IOServiceMessageUserNotification9MetaClass5allocEv __ZNK8IOMapper12getMetaClassEv +__ZNK8IOMapper13getBypassMaskEPy __ZNK8IOMapper9MetaClass5allocEv __ZNK8IOPMpriv12getMetaClassEv __ZNK8IOPMpriv9MetaClass5allocEv @@ -1926,6 +2006,7 @@ __ZTV11IOCatalogue __ZTV11IODataQueue __ZTV11IOMemoryMap __ZTV11IOResources +__ZTV12IODMACommand __ZTV12IOPMinformee __ZTV12IORootParent __ZTV12IOUserClient @@ -1984,6 +2065,7 @@ __ZTVN11IOCatalogue9MetaClassE __ZTVN11IODataQueue9MetaClassE __ZTVN11IOMemoryMap9MetaClassE __ZTVN11IOResources9MetaClassE +__ZTVN12IODMACommand9MetaClassE __ZTVN12IOPMinformee9MetaClassE __ZTVN12IORootParent9MetaClassE __ZTVN12IOUserClient9MetaClassE diff --git a/config/IOKit.ppc.exports b/config/IOKit.ppc.exports index e69de29bb..42d90d845 100644 --- a/config/IOKit.ppc.exports +++ b/config/IOKit.ppc.exports @@ -0,0 +1,39 @@ +__Z11IODBDMAStopPV23IODBDMAChannelRegisters +__Z12IODBDMAFlushPV23IODBDMAChannelRegisters +__Z12IODBDMAPausePV23IODBDMAChannelRegisters +__Z12IODBDMAResetPV23IODBDMAChannelRegisters +__Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor +__Z15IODBDMAContinuePV23IODBDMAChannelRegisters +__ZN19IODBDMAMemoryCursor10gMetaClassE +__ZN19IODBDMAMemoryCursor10superClassE +__ZN19IODBDMAMemoryCursor17withSpecificationEmmm +__ZN19IODBDMAMemoryCursor21initWithSpecificationEmmm +__ZN19IODBDMAMemoryCursor9MetaClassC1Ev +__ZN19IODBDMAMemoryCursor9MetaClassC2Ev +__ZN19IODBDMAMemoryCursor9metaClassE +__ZN19IODBDMAMemoryCursorC1EPK11OSMetaClass +__ZN19IODBDMAMemoryCursorC1Ev +__ZN19IODBDMAMemoryCursorC2EPK11OSMetaClass +__ZN19IODBDMAMemoryCursorC2Ev +__ZN19IODBDMAMemoryCursorD0Ev +__ZN19IODBDMAMemoryCursorD2Ev +__ZN9IOService20_RESERVEDIOService48Ev +__ZN9IOService20_RESERVEDIOService49Ev +__ZN9IOService20_RESERVEDIOService50Ev +__ZN9IOService20_RESERVEDIOService51Ev +__ZN9IOService20_RESERVEDIOService52Ev +__ZN9IOService20_RESERVEDIOService53Ev +__ZN9IOService20_RESERVEDIOService54Ev +__ZN9IOService20_RESERVEDIOService55Ev +__ZN9IOService20_RESERVEDIOService56Ev +__ZN9IOService20_RESERVEDIOService57Ev +__ZN9IOService20_RESERVEDIOService58Ev +__ZN9IOService20_RESERVEDIOService59Ev +__ZN9IOService20_RESERVEDIOService60Ev +__ZN9IOService20_RESERVEDIOService61Ev +__ZN9IOService20_RESERVEDIOService62Ev +__ZN9IOService20_RESERVEDIOService63Ev +__ZNK19IODBDMAMemoryCursor12getMetaClassEv +__ZNK19IODBDMAMemoryCursor9MetaClass5allocEv +__ZTV19IODBDMAMemoryCursor +__ZTVN19IODBDMAMemoryCursor9MetaClassE diff --git a/config/Libkern.exports b/config/Libkern.exports index eeb7bfb60..1fad54e93 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -16,8 +16,6 @@ _OSCompareAndSwap _OSDecrementAtomic _OSDecrementAtomic16 _OSDecrementAtomic8 -_OSDequeueAtomic -_OSEnqueueAtomic _OSFree _OSIncrementAtomic _OSIncrementAtomic16 @@ -417,22 +415,6 @@ __ZN8OSObject19_RESERVEDOSObject12Ev __ZN8OSObject19_RESERVEDOSObject13Ev __ZN8OSObject19_RESERVEDOSObject14Ev __ZN8OSObject19_RESERVEDOSObject15Ev -__ZN8OSObject19_RESERVEDOSObject16Ev -__ZN8OSObject19_RESERVEDOSObject17Ev -__ZN8OSObject19_RESERVEDOSObject18Ev -__ZN8OSObject19_RESERVEDOSObject19Ev -__ZN8OSObject19_RESERVEDOSObject20Ev -__ZN8OSObject19_RESERVEDOSObject21Ev -__ZN8OSObject19_RESERVEDOSObject22Ev -__ZN8OSObject19_RESERVEDOSObject23Ev -__ZN8OSObject19_RESERVEDOSObject24Ev -__ZN8OSObject19_RESERVEDOSObject25Ev -__ZN8OSObject19_RESERVEDOSObject26Ev -__ZN8OSObject19_RESERVEDOSObject27Ev -__ZN8OSObject19_RESERVEDOSObject28Ev -__ZN8OSObject19_RESERVEDOSObject29Ev -__ZN8OSObject19_RESERVEDOSObject30Ev -__ZN8OSObject19_RESERVEDOSObject31Ev __ZN8OSObject4freeEv __ZN8OSObject4initEv __ZN8OSObject9MetaClassC1Ev diff --git a/config/Libkern.ppc.exports b/config/Libkern.ppc.exports index 9b1bdcf7a..df175fdcc 100644 --- a/config/Libkern.ppc.exports +++ b/config/Libkern.ppc.exports @@ -1,2 +1,21 @@ +_OSDequeueAtomic +_OSEnqueueAtomic +__ZN8OSObject19_RESERVEDOSObject16Ev +__ZN8OSObject19_RESERVEDOSObject17Ev +__ZN8OSObject19_RESERVEDOSObject18Ev +__ZN8OSObject19_RESERVEDOSObject19Ev +__ZN8OSObject19_RESERVEDOSObject20Ev +__ZN8OSObject19_RESERVEDOSObject21Ev +__ZN8OSObject19_RESERVEDOSObject22Ev +__ZN8OSObject19_RESERVEDOSObject23Ev +__ZN8OSObject19_RESERVEDOSObject24Ev +__ZN8OSObject19_RESERVEDOSObject25Ev +__ZN8OSObject19_RESERVEDOSObject26Ev +__ZN8OSObject19_RESERVEDOSObject27Ev +__ZN8OSObject19_RESERVEDOSObject28Ev +__ZN8OSObject19_RESERVEDOSObject29Ev +__ZN8OSObject19_RESERVEDOSObject30Ev +__ZN8OSObject19_RESERVEDOSObject31Ev _bcopy_nc _bzero_nc + diff --git a/config/MasterVersion b/config/MasterVersion index 646e0cd0f..a7fc9864a 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -8.7.0 +8.7.2 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/System6.0.exports b/config/System6.0.exports index 1815d9dfb..a3ad829d3 100644 --- a/config/System6.0.exports +++ b/config/System6.0.exports @@ -128,8 +128,6 @@ _OSCompareAndSwap _OSDecrementAtomic _OSDecrementAtomic16 _OSDecrementAtomic8 -_OSDequeueAtomic -_OSEnqueueAtomic _OSIncrementAtomic _OSIncrementAtomic16 _OSIncrementAtomic8 @@ -250,9 +248,9 @@ __ZN10IOWorkLoop10wakeupGateEPvb __ZN10IOWorkLoop12tryCloseGateEv __ZN10IOWorkLoop13_maintRequestEPvS0_S0_S0_ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource +__ZN10IOWorkLoop15runEventSourcesEv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev @@ -495,6 +493,8 @@ __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor __ZN12IOUserClient30getExternalAsyncMethodForIndexEm __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem __ZN12IOUserClient4freeEv +__ZN12IOUserClient4initEP12OSDictionary +__ZN12IOUserClient4initEv __ZN12IOUserClient9MetaClassC1Ev __ZN12IOUserClient9MetaClassC2Ev __ZN12IOUserClient9metaClassE @@ -654,6 +654,7 @@ __ZN13IOCommandGate10gMetaClassE __ZN13IOCommandGate10runCommandEPvS0_S0_S0_ __ZN13IOCommandGate10superClassE __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E +__ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop __ZN13IOCommandGate12checkForWorkEv __ZN13IOCommandGate12commandSleepEPvm __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ @@ -667,7 +668,10 @@ __ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev +__ZN13IOCommandGate4freeEv __ZN13IOCommandGate4initEP8OSObjectPFiS1_PvS2_S2_S2_E +__ZN13IOCommandGate6enableEv +__ZN13IOCommandGate7disableEv __ZN13IOCommandGate9MetaClassC1Ev __ZN13IOCommandGate9MetaClassC2Ev __ZN13IOCommandGate9metaClassE @@ -815,7 +819,6 @@ __ZN14IOPMrootDomain25announcePowerSourceChangeEv __ZN14IOPMrootDomain26handleSleepTimerExpirationEv __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv __ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j -__ZN14IOPMrootDomain27registerPMSettingControllerEPFiiiPvES0_ __ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev @@ -848,30 +851,6 @@ __ZN15IOConditionLockC2EPK11OSMetaClass __ZN15IOConditionLockC2Ev __ZN15IOConditionLockD0Ev __ZN15IOConditionLockD2Ev -__ZN15IOPMPowerSource10gMetaClassE -__ZN15IOPMPowerSource10isChargingEv -__ZN15IOPMPowerSource10superClassE -__ZN15IOPMPowerSource11acConnectedEv -__ZN15IOPMPowerSource11atWarnLevelEv -__ZN15IOPMPowerSource11curCapacityEv -__ZN15IOPMPowerSource11isInstalledEv -__ZN15IOPMPowerSource11maxCapacityEv -__ZN15IOPMPowerSource12currentDrawnEv -__ZN15IOPMPowerSource12updateStatusEv -__ZN15IOPMPowerSource13timeRemainingEv -__ZN15IOPMPowerSource24capacityPercentRemainingEv -__ZN15IOPMPowerSource4initEt -__ZN15IOPMPowerSource7voltageEv -__ZN15IOPMPowerSource8depletedEv -__ZN15IOPMPowerSource9MetaClassC1Ev -__ZN15IOPMPowerSource9MetaClassC2Ev -__ZN15IOPMPowerSource9metaClassE -__ZN15IOPMPowerSourceC1EPK11OSMetaClass -__ZN15IOPMPowerSourceC1Ev -__ZN15IOPMPowerSourceC2EPK11OSMetaClass -__ZN15IOPMPowerSourceC2Ev -__ZN15IOPMPowerSourceD0Ev -__ZN15IOPMPowerSourceD2Ev __ZN15IOPanicPlatform10gMetaClassE __ZN15IOPanicPlatform10superClassE __ZN15IOPanicPlatform5startEP9IOService @@ -1258,7 +1237,6 @@ __ZN18IOMemoryDescriptor18getPhysicalAddressEv __ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor5Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev @@ -1539,6 +1517,7 @@ __ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm __ZN21IOSubMemoryDescriptor16performOperationEmmm __ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm +__ZN21IOSubMemoryDescriptor20getPhysicalSegment64EmPm __ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN21IOSubMemoryDescriptor4freeEv @@ -1658,9 +1637,10 @@ __ZN24IOBufferMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task __ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy __ZN24IOBufferMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN24IOBufferMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor1Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev @@ -1727,6 +1707,7 @@ __ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv __ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm __ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm __ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm +__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm __ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN25IOGeneralMemoryDescriptor4freeEv @@ -1950,8 +1931,8 @@ __ZN7OSArray12removeObjectEj __ZN7OSArray12withCapacityEj __ZN7OSArray13initWithArrayEPKS_j __ZN7OSArray13replaceObjectEjPK15OSMetaClassBase -__ZN7OSArray14ensureCapacityEj __ZN7OSArray14copyCollectionEP12OSDictionary +__ZN7OSArray14ensureCapacityEj __ZN7OSArray15flushCollectionEv __ZN7OSArray15initWithObjectsEPPK8OSObjectjj __ZN7OSArray16initWithCapacityEj @@ -1986,7 +1967,6 @@ __ZN8IOMapper10superClassE __ZN8IOMapper11NewARTTableEmPPvPj __ZN8IOMapper12FreeARTTableEP6OSDatam __ZN8IOMapper17setMapperRequiredEb -__ZN8IOMapper18_RESERVEDIOMapper0Ev __ZN8IOMapper18_RESERVEDIOMapper1Ev __ZN8IOMapper18_RESERVEDIOMapper2Ev __ZN8IOMapper18_RESERVEDIOMapper3Ev @@ -2097,22 +2077,6 @@ __ZN8OSObject19_RESERVEDOSObject12Ev __ZN8OSObject19_RESERVEDOSObject13Ev __ZN8OSObject19_RESERVEDOSObject14Ev __ZN8OSObject19_RESERVEDOSObject15Ev -__ZN8OSObject19_RESERVEDOSObject16Ev -__ZN8OSObject19_RESERVEDOSObject17Ev -__ZN8OSObject19_RESERVEDOSObject18Ev -__ZN8OSObject19_RESERVEDOSObject19Ev -__ZN8OSObject19_RESERVEDOSObject20Ev -__ZN8OSObject19_RESERVEDOSObject21Ev -__ZN8OSObject19_RESERVEDOSObject22Ev -__ZN8OSObject19_RESERVEDOSObject23Ev -__ZN8OSObject19_RESERVEDOSObject24Ev -__ZN8OSObject19_RESERVEDOSObject25Ev -__ZN8OSObject19_RESERVEDOSObject26Ev -__ZN8OSObject19_RESERVEDOSObject27Ev -__ZN8OSObject19_RESERVEDOSObject28Ev -__ZN8OSObject19_RESERVEDOSObject29Ev -__ZN8OSObject19_RESERVEDOSObject30Ev -__ZN8OSObject19_RESERVEDOSObject31Ev __ZN8OSObject4freeEv __ZN8OSObject4initEv __ZN8OSObject9MetaClassC1Ev @@ -2311,6 +2275,7 @@ __ZN9IOService15getDeviceMemoryEv __ZN9IOService15getPMRootDomainEv __ZN9IOService15instruct_driverEm __ZN9IOService15lookupInterruptEibPP21IOInterruptController +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService15powerChangeDoneEm __ZN9IOService15probeCandidatesEP12OSOrderedSet __ZN9IOService15publishResourceEPK8OSSymbolP8OSObject @@ -2366,7 +2331,6 @@ __ZN9IOService18matchPropertyTableEP12OSDictionary __ZN9IOService18matchPropertyTableEP12OSDictionaryPl __ZN9IOService18setIdleTimerPeriodEm __ZN9IOService18settleTimerExpiredEv -__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService19_RESERVEDIOService4Ev __ZN9IOService19_RESERVEDIOService5Ev __ZN9IOService19_RESERVEDIOService6Ev @@ -2423,22 +2387,6 @@ __ZN9IOService20_RESERVEDIOService44Ev __ZN9IOService20_RESERVEDIOService45Ev __ZN9IOService20_RESERVEDIOService46Ev __ZN9IOService20_RESERVEDIOService47Ev -__ZN9IOService20_RESERVEDIOService48Ev -__ZN9IOService20_RESERVEDIOService49Ev -__ZN9IOService20_RESERVEDIOService50Ev -__ZN9IOService20_RESERVEDIOService51Ev -__ZN9IOService20_RESERVEDIOService52Ev -__ZN9IOService20_RESERVEDIOService53Ev -__ZN9IOService20_RESERVEDIOService54Ev -__ZN9IOService20_RESERVEDIOService55Ev -__ZN9IOService20_RESERVEDIOService56Ev -__ZN9IOService20_RESERVEDIOService57Ev -__ZN9IOService20_RESERVEDIOService58Ev -__ZN9IOService20_RESERVEDIOService59Ev -__ZN9IOService20_RESERVEDIOService60Ev -__ZN9IOService20_RESERVEDIOService61Ev -__ZN9IOService20_RESERVEDIOService62Ev -__ZN9IOService20_RESERVEDIOService63Ev __ZN9IOService20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_ __ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_ __ZN9IOService20getDeviceMemoryCountEv @@ -2754,6 +2702,7 @@ __ZNK18IODTPlatformExpert14compareNubNameEPK9IOServiceP8OSStringPS4_ __ZNK18IODTPlatformExpert9MetaClass5allocEv __ZNK18IOMemoryDescriptor12getDirectionEv __ZNK18IOMemoryDescriptor12getMetaClassEv +__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj __ZNK18IOMemoryDescriptor9MetaClass5allocEv __ZNK18IOMemoryDescriptor9getLengthEv __ZNK18IOPMchangeNoteList12getMetaClassEv @@ -2781,6 +2730,7 @@ __ZNK21IOInterruptController9MetaClass5allocEv __ZNK21IONaturalMemoryCursor12getMetaClassEv __ZNK21IONaturalMemoryCursor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor12getMetaClassEv +__ZNK21IOSubMemoryDescriptor19dmaCommandOperationEmPvj __ZNK21IOSubMemoryDescriptor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor9serializeEP11OSSerialize __ZNK22IOInterruptEventSource11getIntIndexEv @@ -2802,6 +2752,7 @@ __ZNK24IOBufferMemoryDescriptor9MetaClass5allocEv __ZNK24IOCPUInterruptController12getMetaClassEv __ZNK24IOCPUInterruptController9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor12getMetaClassEv +__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj __ZNK25IOGeneralMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor9serializeEP11OSSerialize __ZNK25IOServiceUserNotification12getMetaClassEv @@ -2860,6 +2811,7 @@ __ZNK7OSArray9isEqualToEPK15OSMetaClassBase __ZNK7OSArray9isEqualToEPKS_ __ZNK7OSArray9serializeEP11OSSerialize __ZNK8IOMapper12getMetaClassEv +__ZNK8IOMapper13getBypassMaskEPy __ZNK8IOMapper9MetaClass5allocEv __ZNK8IOPMpriv12getMetaClassEv __ZNK8IOPMpriv9MetaClass5allocEv @@ -3474,6 +3426,11 @@ _pexpert_version_minor:_version_minor _pexpert_version_variant:_version_variant _pmap_extract _pmap_find_phys +_pmsBuild +_pmsPark +_pmsRun +_pmsRunLocal +_pmsStart _print_vmpage_stat _printf _processor_exit diff --git a/config/System6.0.i386.exports b/config/System6.0.i386.exports index 5b8690450..b6968b6d1 100644 --- a/config/System6.0.i386.exports +++ b/config/System6.0.i386.exports @@ -1,332 +1,21 @@ -_Gdt -_Load_context -_PE_incoming_interrupt +_Cstate_table_set _PE_install_interrupt_handler _PE_interrupt_handler -_Thread_continue -__ZN15AppleIntelClock10gMetaClassE -__ZN15AppleIntelClock10superClassE -__ZN15AppleIntelClock5startEP9IOService -__ZN15AppleIntelClock9MetaClassC1Ev -__ZN15AppleIntelClock9MetaClassC2Ev -__ZN15AppleIntelClock9metaClassE -__ZN15AppleIntelClockC1EPK11OSMetaClass -__ZN15AppleIntelClockC1Ev -__ZN15AppleIntelClockC2EPK11OSMetaClass -__ZN15AppleIntelClockC2Ev -__ZN15AppleIntelClockD0Ev -__ZN15AppleIntelClockD2Ev -__ZNK15AppleIntelClock12getMetaClassEv -__ZNK15AppleIntelClock9MetaClass5allocEv -__ZTV15AppleIntelClock -__ZTVN15AppleIntelClock9MetaClassE -___divsi3 -___udivsi3 -__clts -__fldcw -__fnclex -__fninit -__fnstsw -__fprestore -__fpsave -__fstcw -__mp_disable_preemption -__mp_enable_preemption -__mp_enable_preemption_no_check -__setts -_a_dbl_fault -_a_fpu_over -_a_inv_tss -_acc_type _acpi_install_wake_handler _acpi_sleep_kernel -_act_machine_return -_all_intrs -_alltraps -_avail_end -_avail_start -_bbc_config -_bbc_gettime -_bbc_settime -_bcopy16 -_bcopy_no_overwrite -_bit_lock -_bit_lock_try -_bit_unlock -_blkclr -_boot_args_start -_check_io_fault -_clear_kdb_intr -_cli_count -_collect_ref -_collect_unref -_copyp2p -_copypv -_cpu_idle_handler -_cpu_interrupt _cpu_number -_cpu_shutdown _cpu_to_lapic -_cpu_vendors -_cpudata_desc_pattern -_cpuid_cpu_display -_cpuid_family -_cpuid_feature -_cpuid_feature_display _cpuid_features -_cpuid_get_feature_names -_cpuid_get_info _cpuid_info -_cpus_active -_cpus_idle -_createdt -_dectohexdec -_dev_indirect_count -_dev_indirect_list -_div_scale -_dr0 -_dr1 -_dr2 -_dr3 -_dr6 -_dr_addr -_dump_act -_dump_regs -_eintstack -_emulate_io -_fakePPCBootArgs -_fakePPCDeviceTree -_fc_get -_fix_desc -_flush_tlb -_fp_free -_fp_kind -_fp_load -_fp_save -_fp_state_alloc -_fpexterrflt -_fpextovrflt -_fpflush -_fpinit -_fpintr -_fpnoextflt -_fpu_get_fxstate -_fpu_get_state -_fpu_module_init -_fpu_set_fxstate -_fpu_set_state -_gDriversProp -_gMemoryMapNode -_gdt -_gdtptr -_gdtr -_get_cr0 -_get_cr2 -_get_cr3 -_get_cr4 -_get_ldt -_get_pc -_get_tr -_hexdectodec -_htonl -_htons -_i386_astintr -_i386_exception -_i386_init -_i386_preinit -_i386_signal_cpu -_i386_signal_cpus -_i386_vm_init -_i_bit_clear -_i_bit_set -_idt -_idtptr -_ifps_zone -_indent -_init_fpu -_insb -_insl -_inst_fetch -_insw -_intel_read_fault -_intel_startCPU -_inuse_ptepages_count -_iopb_destroy -_iopb_init -_jail -_kd_slmscd -_kd_slmscu -_kd_slmwd -_kdp_copy_kmem -_kdp_getstate -_kdp_i386_backtrace -_kdp_i386_trap -_kdp_setstate _kdreboot -_kernel_preempt_check -_kernel_trap -_ktss _lapic_end_of_interrupt _lapic_smm_restore -_last_addr -_ldt -_ldt_desc_pattern -_linb -_linl -_linw -_locore_end -_loutb -_loutl -_loutw -_mach25_syscall -_mach_rpc -_machdep_call_count -_machdep_call_table -_machdep_syscall -_master_cpu -_master_is_up -_master_processor -_master_up -_minsecurity _ml_get_max_cpus -_mp_boot_pde -_mp_kdp_enter -_mp_kdp_exit -_mp_kdp_lock -_mp_kdp_ncpus -_mp_kdp_trap +_mp_rendezvous_no_intrs _mtrr_range_add _mtrr_range_remove -_mul_scale -_nptr -_ntohl -_ntohs -_outsb -_outsl -_outsw -_panic_trap -_phys_attribute_clear -_phys_attribute_set -_phys_attribute_test -_pmap_alloc_chunk -_pmap_cache_count -_pmap_cache_list -_pmap_cache_lock -_pmap_cache_max -_pmap_copy_part_lpage -_pmap_copy_part_rpage -_pmap_debug -_pmap_expand -_pmap_map_bd -_pmap_movepage -_pmap_phys_attributes -_pmap_pte -_pmap_remove_range -_pmap_set_modify -_pmap_system_lock -_pmap_update_interrupt -_pmap_valid_page -_printdt -_process_pmap_updates -_pstart -_pv_free_list -_pv_free_list_lock -_pv_head_table -_pv_list_zone -_pv_lock_table -_real_to_prot -_recover_table -_recover_table_end -_remote_kdb -_reset_mem_on_reboot -_retry_table -_retry_table_end -_return_to_iret +_pmsCPUSetPStateLimit +_pmsCPULoadVIDTable _rtc_clock_stepped _rtc_clock_stepping -_rtc_cyc_per_sec -_rtcget -_rtclock -_rtcput -_sectOBJCB -_sectSizeOBJC -_serial_getc -_serial_init -_set_cpu_model -_set_cr0 -_set_cr3 -_set_cr4 -_set_kbd_leds -_set_ldt -_set_tr -_signal_cpus -_slave_boot_base -_slave_boot_end -_slave_boot_init -_slave_pstart -_slave_start -_smp_init _smp_initialized -_start_lock -_startprog -_sti_count -_t_bounds -_t_debug -_t_fpu_err -_t_gen_prot -_t_int3 -_t_into -_t_invop -_t_nofpu -_t_page_fault -_t_preempt -_t_segnp -_t_stack_fault -_t_trap_0f -_t_trap_11 -_t_trap_12 -_t_trap_13 -_t_trap_14 -_t_trap_15 -_t_trap_16 -_t_trap_17 -_t_trap_18 -_t_trap_19 -_t_trap_1a -_t_trap_1b -_t_trap_1c -_t_trap_1d -_t_trap_1e -_t_trap_1f -_t_zero_div -_tc_clear_screen -_tc_enable -_tc_hide_cursor -_tc_initialize -_tc_paint_char -_tc_scroll_down -_tc_scroll_up -_tc_show_cursor -_tc_update_color -_thread_bind -_thread_compose_cthread_desc -_thread_fast_set_cthread_self -_thread_get_cthread_self -_thread_set_cthread_self -_trap_mach25_syscall -_trap_machdep_syscall -_trap_unix_syscall -_tss_desc_pattern -_user_ldt_free -_user_page_fault_continue -_user_trap -_v86_assist -_v86_assist_on -_v86_do_sti_cli -_v86_do_sti_immediate -_v86_unsafe_ok -_virtual_avail -_virtual_end -_vm_first_phys -_vm_last_phys -_yeartoday diff --git a/config/System6.0.ppc.exports b/config/System6.0.ppc.exports index 16e230227..3b367efcc 100644 --- a/config/System6.0.ppc.exports +++ b/config/System6.0.ppc.exports @@ -6,6 +6,8 @@ _PE_read_write_time_of_day _PE_write_IIC _PPCcalls _ResetHandler +_OSEnqueueAtomic +_OSDequeueAtomic __Z11IODBDMAStopPV23IODBDMAChannelRegisters __Z12IODBDMAFlushPV23IODBDMAChannelRegisters __Z12IODBDMAPausePV23IODBDMAChannelRegisters @@ -149,6 +151,38 @@ __ZN8AppleNMIC2EPK11OSMetaClass __ZN8AppleNMIC2Ev __ZN8AppleNMID0Ev __ZN8AppleNMID2Ev +__ZN8OSObject19_RESERVEDOSObject16Ev +__ZN8OSObject19_RESERVEDOSObject17Ev +__ZN8OSObject19_RESERVEDOSObject18Ev +__ZN8OSObject19_RESERVEDOSObject19Ev +__ZN8OSObject19_RESERVEDOSObject20Ev +__ZN8OSObject19_RESERVEDOSObject21Ev +__ZN8OSObject19_RESERVEDOSObject22Ev +__ZN8OSObject19_RESERVEDOSObject23Ev +__ZN8OSObject19_RESERVEDOSObject24Ev +__ZN8OSObject19_RESERVEDOSObject25Ev +__ZN8OSObject19_RESERVEDOSObject26Ev +__ZN8OSObject19_RESERVEDOSObject27Ev +__ZN8OSObject19_RESERVEDOSObject28Ev +__ZN8OSObject19_RESERVEDOSObject29Ev +__ZN8OSObject19_RESERVEDOSObject30Ev +__ZN8OSObject19_RESERVEDOSObject31Ev +__ZN9IOService20_RESERVEDIOService48Ev +__ZN9IOService20_RESERVEDIOService49Ev +__ZN9IOService20_RESERVEDIOService50Ev +__ZN9IOService20_RESERVEDIOService51Ev +__ZN9IOService20_RESERVEDIOService52Ev +__ZN9IOService20_RESERVEDIOService53Ev +__ZN9IOService20_RESERVEDIOService54Ev +__ZN9IOService20_RESERVEDIOService55Ev +__ZN9IOService20_RESERVEDIOService56Ev +__ZN9IOService20_RESERVEDIOService57Ev +__ZN9IOService20_RESERVEDIOService58Ev +__ZN9IOService20_RESERVEDIOService59Ev +__ZN9IOService20_RESERVEDIOService60Ev +__ZN9IOService20_RESERVEDIOService61Ev +__ZN9IOService20_RESERVEDIOService62Ev +__ZN9IOService20_RESERVEDIOService63Ev __ZNK10AppleMacIO12getMetaClassEv __ZNK10AppleMacIO14compareNubNameEPK9IOServiceP8OSStringPS4_ __ZNK10AppleMacIO9MetaClass5allocEv @@ -208,8 +242,3 @@ _ml_mem_backoff _pe_do_clock_test _pe_run_clock_test _scc -_pmsStart -_pmsPark -_pmsRun -_pmsRunLocal -_pmsBuild diff --git a/config/Unsupported.exports b/config/Unsupported.exports index 816e4e058..06b4312eb 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -8,18 +8,39 @@ _KUNCUserNotificationDisplayNotice _MD5Final _MD5Init _MD5Update +__ZN15IOWatchDogTimer10gMetaClassE +__ZN15IOWatchDogTimer10superClassE +__ZN15IOWatchDogTimer13setPropertiesEP8OSObject +__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev +__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev +__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev +__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev +__ZN15IOWatchDogTimer4stopEP9IOService +__ZN15IOWatchDogTimer5startEP9IOService +__ZN15IOWatchDogTimer9MetaClassC1Ev +__ZN15IOWatchDogTimer9MetaClassC2Ev +__ZN15IOWatchDogTimer9metaClassE +__ZN15IOWatchDogTimerC1EPK11OSMetaClass +__ZN15IOWatchDogTimerC2EPK11OSMetaClass +__ZN15IOWatchDogTimerD0Ev +__ZN15IOWatchDogTimerD2Ev __ZN16IOPlatformDevice10gMetaClassE __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev -__ZN16IOPlatformDeviceC2EPK11OSMetaClass __ZN16IOPlatformDevice9metaClassE +__ZN16IOPlatformDeviceC2EPK11OSMetaClass __ZN16IOPlatformDeviceD2Ev __ZN18IODTPlatformExpert9metaClassE +__ZNK15IOWatchDogTimer12getMetaClassEv +__ZNK15IOWatchDogTimer9MetaClass5allocEv +__ZTV15IOWatchDogTimer __ZTV16IOPlatformDevice +__ZTVN15IOWatchDogTimer9MetaClassE __doprnt _aes_decrypt_cbc +_aes_decrypt_key _aes_decrypt_key128 _aes_encrypt_cbc _aes_encrypt_key128 @@ -130,6 +151,8 @@ _m_pullup _m_split _m_trailingspace _mach_make_memory_entry_64 +_mach_memory_entry_page_op +_mach_memory_entry_range_op _max_mem _mcl_to_paddr _mem_size @@ -203,6 +226,7 @@ _soconnect _socreate _sodisconnect _sofree +_sofreelastref _soisconnected _soisconnecting _soisdisconnected @@ -220,6 +244,8 @@ _stack_privilege _task_resume _task_suspend _tcbinfo +_temp_patch_ptrace +_temp_unpatch_ptrace _termioschars _thread_call_func _thread_call_func_cancel @@ -259,8 +285,8 @@ _vm_map_wire _vm_protect _vm_region _vm_region_object_create -_vnode_getparent _vnode_getname +_vnode_getparent _vnode_putname _vnode_tag _vnode_update_identity diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports index 72a1f46cf..9099033c0 100644 --- a/config/Unsupported.i386.exports +++ b/config/Unsupported.i386.exports @@ -1,8 +1,18 @@ -_copypv -_cpu_number -_master_cpu -_master_processor -_ml_get_max_cpus -_mtrr_range_add -_mtrr_range_remove -_thread_bind +_cpu_data_ptr +_dsmos_page_transform_hook +_gPEEFISystemTable +_hpet_get_info +_io_map_spec +_lapic_start +_ml_get_apicid +_ml_get_maxbusdelay +_ml_get_maxsnoop +_ml_hpet_cfg +_mp_rendezvous +_pmRegister +_pm_init_lock +_rdHPET +_real_ncpus +_rtc_clock_napped +_tmrCvt +_tsc_get_info diff --git a/config/Unsupported.ppc.exports b/config/Unsupported.ppc.exports index 1f785c1ee..350464074 100644 --- a/config/Unsupported.ppc.exports +++ b/config/Unsupported.ppc.exports @@ -21,8 +21,6 @@ _ml_ppc_sleep _ml_set_processor_speed _ml_set_processor_voltage _ml_throttle -_temp_patch_ptrace -_temp_unpatch_ptrace _pmsStart _pmsPark _pmsRun diff --git a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp index 74680f724..c3e51b319 100644 --- a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp +++ b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp @@ -67,7 +67,9 @@ bool AppleNMI::start(IOService *provider) addNotification( gIOPublishNotification, serviceMatching("IOPMrootDomain"), (IOServiceNotificationHandler)RootRegistered, this, 0 ); // Register the interrupt. - provider->registerInterrupt(0, this, (IOInterruptAction) &AppleNMI::handleInterrupt, 0); + IOInterruptAction handler = OSMemberFunctionCast(IOInterruptAction, + this, &AppleNMI::handleInterrupt); + provider->registerInterrupt(0, this, handler, 0); provider->enableInterrupt(0); return true; @@ -119,24 +121,24 @@ IOReturn AppleNMI::powerStateWillChangeTo ( IOPMPowerFlags theFlags, unsigned lo { // Mask NMI and change from edge to level whilst sleeping (copied directly from OS9 code) nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; - nmiIntSource = ml_phys_read(nmiIntSourceAddr); + nmiIntSource = ml_phys_read((vm_address_t)nmiIntSourceAddr); nmiIntSource |= kNMIIntLevelMask; - ml_phys_write(nmiIntSourceAddr, nmiIntSource); + ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); eieio(); nmiIntSource |= kNMIIntMask; - ml_phys_write(nmiIntSourceAddr, nmiIntSource); + ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); eieio(); } else { // Unmask NMI and change back to edge (copied directly from OS9 code) nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; - nmiIntSource = ml_phys_read(nmiIntSourceAddr); + nmiIntSource = ml_phys_read((vm_address_t)nmiIntSourceAddr); nmiIntSource &= ~kNMIIntLevelMask; - ml_phys_write(nmiIntSourceAddr, nmiIntSource); + ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); eieio(); nmiIntSource &= ~kNMIIntMask; - ml_phys_write(nmiIntSourceAddr, nmiIntSource); + ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); eieio(); } } diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h index afe2c1794..f976759aa 100644 --- a/iokit/IOKit/IOBufferMemoryDescriptor.h +++ b/iokit/IOKit/IOBufferMemoryDescriptor.h @@ -69,9 +69,19 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor vm_size_t capacity, vm_offset_t alignment, task_t inTask); - OSMetaClassDeclareReservedUsed(IOBufferMemoryDescriptor, 0); + +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual bool initWithPhysicalMask( + task_t inTask, + IOOptionBits options, + mach_vm_size_t capacity, + mach_vm_address_t alignment, + mach_vm_address_t physicalMask); + OSMetaClassDeclareReservedUsed(IOBufferMemoryDescriptor, 1); +#else OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 1); +#endif OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 2); OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 3); OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 4); @@ -144,6 +154,7 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor @discussion Added in Mac OS X 10.2, this method allocates a memory buffer with a given size and alignment in the task's address space specified, and returns a memory descriptor instance representing the memory. It is recommended that memory allocated for I/O or sharing via mapping be created via IOBufferMemoryDescriptor. Options passed with the request specify the kind of memory to be allocated - pageablity and sharing are specified with option bits. This function may block and so should not be called from interrupt level or while a simple lock is held. @param inTask The task the buffer will be allocated in. @param options Options for the allocation:
+ kIODirectionOut, kIODirectionIn - set the direction of the I/O transfer.
kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute.
kIOMemoryPageable - pass to request memory be non-wired - the default for kernel allocated memory is wired.
kIOMemoryPurgeable - pass to request memory that may later have its purgeable state set with IOMemoryDescriptor::setPurgeable. Only supported for kIOMemoryPageable allocations.
@@ -158,6 +169,26 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor vm_size_t capacity, vm_offset_t alignment = 1); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) +/*! @function inTaskWithPhysicalMask + @abstract Creates a memory buffer with memory descriptor for that buffer. + @discussion Added in Mac OS X 10.5, this method allocates a memory buffer with a given size and alignment in the task's address space specified, and returns a memory descriptor instance representing the memory. It is recommended that memory allocated for I/O or sharing via mapping be created via IOBufferMemoryDescriptor. Options passed with the request specify the kind of memory to be allocated - pageablity and sharing are specified with option bits. This function may block and so should not be called from interrupt level or while a simple lock is held. + @param inTask The task the buffer will be mapped in. Pass NULL to create memory unmapped in any task (eg. for use as a DMA buffer). + @param options Options for the allocation:
+ kIODirectionOut, kIODirectionIn - set the direction of the I/O transfer.
+ kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute.
+ kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications. + @param capacity The number of bytes to allocate. + @param mask The buffer will be allocated with pages such that physical addresses will only have bits set present in physicalMask. For example, pass 0x00000000FFFFFFFFULL for a buffer to be accessed by hardware that has 32 address bits. + @result Returns an instance of class IOBufferMemoryDescriptor to be released by the caller, which will free the memory desriptor and associated buffer. */ + + static IOBufferMemoryDescriptor * inTaskWithPhysicalMask( + task_t inTask, + IOOptionBits options, + mach_vm_size_t capacity, + mach_vm_address_t physicalMask); +#endif + /* * withCapacity: * @@ -242,6 +273,11 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor * will not copy past the end of the memory descriptor's current capacity. */ virtual bool appendBytes(const void *bytes, vm_size_t withLength); + +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + /* DEPRECATED */ virtual void * getVirtualSegment(IOByteCount offset, + /* DEPRECATED */ IOByteCount * length); +#endif }; #endif /* !_IOBUFFERMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOCommandGate.h b/iokit/IOKit/IOCommandGate.h index 1bb549d8c..5ac54a891 100644 --- a/iokit/IOKit/IOCommandGate.h +++ b/iokit/IOKit/IOCommandGate.h @@ -48,7 +48,7 @@ check if the hardware is active, if so it will add the request to a pending queue internal to the device or the device's family. Otherwise if the hardware is inactive then this request can be acted upon immediately.

- CAUTION: The runAction and runCommand functions can not be called from an interrupt context. + CAUTION: The runAction and runCommand functions can not be called from an interrupt context. But attemptCommand can, though it may return an error */ class IOCommandGate : public IOEventSource @@ -110,6 +110,12 @@ compiler warning. Defaults to zero, see $link IOEventSource::setAction. @result True if inherited classes initialise successfully. */ virtual bool init(OSObject *owner, Action action = 0); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + // Superclass overrides + virtual void free(); + virtual void setWorkLoop(IOWorkLoop *inWorkLoop); +#endif + /*! @function runCommand @abstract Single thread a command with the target work-loop. @discussion Client function that causes the current action to be called in @@ -117,13 +123,12 @@ a single threaded manner. Beware the work-loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a client's thread runCommand will sleep until the work-loop's gate opens for execution of client actions, the action is single threaded against all other -work-loop event sources. +work-loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. @param arg0 Parameter for action of command gate, defaults to 0. @param arg1 Parameter for action of command gate, defaults to 0. @param arg2 Parameter for action of command gate, defaults to 0. @param arg3 Parameter for action of command gate, defaults to 0. - @result kIOReturnSuccess if successful. kIOReturnNotPermitted if this -event source is currently disabled, kIOReturnNoResources if no action available. + @result kIOReturnSuccess if successful. kIOReturnAborted if a disabled command gate is free()ed before being reenabled, kIOReturnNoResources if no action available. */ virtual IOReturn runCommand(void *arg0 = 0, void *arg1 = 0, void *arg2 = 0, void *arg3 = 0); @@ -135,13 +140,13 @@ a single threaded manner. Beware the work-loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a client's thread runAction will sleep until the work-loop's gate opens for execution of client actions, the action is single threaded against all other -work-loop event sources. +work-loop event sources. If the command is disabled the attempt to run a command will be stalled until enable is called. @param action Pointer to function to be executed in work-loop context. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @param arg2 Parameter for action parameter, defaults to 0. @param arg3 Parameter for action parameter, defaults to 0. - @result kIOReturnSuccess if successful. kIOReturnBadArgument if action is not defined, kIOReturnNotPermitted if this event source is currently disabled. + @result kIOReturnSuccess if successful. kIOReturnBadArgument if action is not defined, kIOReturnAborted if a disabled command gate is free()ed before being reenabled. */ virtual IOReturn runAction(Action action, void *arg0 = 0, void *arg1 = 0, @@ -150,9 +155,7 @@ work-loop event sources. /*! @function attemptCommand @abstract Single thread a command with the target work-loop. @discussion Client function that causes the current action to be called in -a single threaded manner. Beware the work-loop's gate is recursive and command -gates can cause direct or indirect re-entrancy. When the executing on a -client's thread attemptCommand will fail if the work-loop's gate is open. +a single threaded manner. When the executing on a client's thread attemptCommand will fail if the work-loop's gate is closed. @param arg0 Parameter for action of command gate, defaults to 0. @param arg1 Parameter for action of command gate, defaults to 0. @param arg2 Parameter for action of command gate, defaults to 0. @@ -167,7 +170,7 @@ client's thread attemptCommand will fail if the work-loop's gate is open. @discussion Client function that causes the given action to be called in a single threaded manner. Beware the work-loop's gate is recursive and command gates can cause direct or indirect re-entrancy. When the executing on a -client's thread attemptCommand will fail if the work-loop's gate is open. +client's thread attemptCommand will fail if the work-loop's gate is closed. @param action Pointer to function to be executed in work-loop context. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @@ -195,6 +198,18 @@ client's thread attemptCommand will fail if the work-loop's gate is open. @param onlyOneThread true to only wake up at most one thread, false otherwise. */ virtual void commandWakeup(void *event, bool oneThread = false); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) +/*! @function disable + @abstract Disable the command gate + @discussion When a command gate is disabled all future calls to runAction and runCommand will stall until the gate is enable()d later. This can be used to block client threads when a system sleep is requested. The IOWorkLoop thread itself will never stall, even when making runAction/runCommand calls. This call must be made from a gated context, to clear potential race conditions. */ + virtual void disable(); + +/*! @function enable + @abstract Enable command gate, this will unblock any blocked Commands and Actions. + @discussion Enable the command gate. The attemptAction/attemptCommand calls will now be enabled and can succeeed. Stalled runCommand/runAction calls will be woken up. */ + virtual void enable(); +#endif + private: OSMetaClassDeclareReservedUnused(IOCommandGate, 0); OSMetaClassDeclareReservedUnused(IOCommandGate, 1); diff --git a/iokit/IOKit/IODMACommand.h b/iokit/IOKit/IODMACommand.h new file mode 100644 index 000000000..02eed40ed --- /dev/null +++ b/iokit/IOKit/IODMACommand.h @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _IODMACOMMAND_H +#define _IODMACOMMAND_H + +#include +#include +class IOMapper; + +/**************************** class IODMACommand ***************************/ + +/*! + @class IODMACommand + @abstract A mechanism to convert memory references to I/O bus addresses. + @discussion The IODMACommand is supersedes the IOMemoryCursor and greatly enhances the functionality and power of it. The command can be specified to output 64 bit physical addresses and also allows driver writers bypass mapping hardware or get addresses suitable for non-snooped DMA. +

+ The command is designed to be very easily subclassable. Most driver writers need to associate some DMA operations with their memory descriptor and usually use a C structure for that purpose. This structure is often kept in a linked list. This IODMACommand has built it linkage and can be derived and 'public:' variables added, giving the developer a structure that can associate a memory descriptor with a particular dma command but will also allow the developer to generate that command and keep the state necessary for tracking it. +

+ It is envisaged that a pool of IODMACommands will be created at driver initialisation and each command will be kept in an IOCommandPool while not in use. However if developers wishes to maintain their own free lists that is certainly possible. See the and for sample code on manipulating the command's doubly linked list entries. +

+ The IODMACommand can be used in a 'weak-linked' manner. To do this you must avoid using any static member functions. Use the, much slower but safe, weakWithSpecification function. On success a dma command instance will be returned. This instance can then be used to clone as many commands as is needed. Remember deriving from this class can not be done weakly, that is no weak subclassing! +*/ + +class IODMACommand : public IOCommand +{ + OSDeclareDefaultStructors(IODMACommand); + +public: + +/*! + @typedef Segment32 + @discussion A 32 bit I/O bus address/length pair +*/ + struct Segment32 { + UInt32 fIOVMAddr, fLength; + }; + +/*! + @typedef Segment64 + @discussion A 64 bit I/O bus address/length pair +*/ + struct Segment64 { + UInt64 fIOVMAddr, fLength; + }; + +/*! @enum MappingOptions + @abstract Mapping types to indicate the desired mapper type for translating memory descriptors into I/O DMA Bus addresses. + @constant kNonCoherent Used by drivers for non-coherent transfers, implies unmapped memmory + @constant kMapped Allow a driver to define addressing size + @constant kBypassed Allow drivers to bypass any mapper + @constant kMaxMappingOptions Internal use only +*/ + enum MappingOptions { + kMapped = 0x00000000, + kBypassed = 0x00000001, + kNonCoherent = 0x00000002, + kTypeMask = 0x0000000f, + + kNoCacheStore = 0x00000010, // Memory in descriptor + kOnChip = 0x00000020, // Indicates DMA is on South Bridge + kIterateOnly = 0x00000040 // DMACommand will be used as a cursor only + }; + +/*! @enum SynchronizeOptions + @abstract Options for the synchronize method. + @constant kForceDoubleBuffer Copy the entire prepared range to a new page aligned buffer. +*/ + enum SynchronizeOptions { + kForceDoubleBuffer = 0x01000000 + }; + +/*! + @typedef SegmentFunction + @discussion Pointer to a C function that translates a 64 segment and outputs a single desired segment to the array at the requested index. There are a group of pre-implemented SegmentFunctions that may be usefull to the developer below. + @param segment The 64Bit I/O bus address and length. + @param segments Base of the output vector of DMA address length pairs. + @param segmentIndex Index to output 'segment' in the 'segments' array. + @result Returns true if segment encoding succeeded. false may be returned if the current segment does not fit in an output segment, i.e. a 38bit address wont fit into a 32 encoding. +*/ + typedef bool (*SegmentFunction)(IODMACommand *target, + Segment64 segment, + void *segments, + UInt32 segmentIndex); + + // -------------- Preimplemented output functions ---------------- + +/*! @function OutputHost32 + @abstract Output host natural Segment32 output segment function. +*/ + static bool OutputHost32(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputHost32 + @abstract Output host natural Segment32 output segment function. + */ +#define kIODMACommandOutputHost32 (IODMACommand::OutputHost32) + +/*! @function OutputBig32 + @abstract Output big-endian Segment32 output segment function. +*/ + static bool OutputBig32(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputBig32 + @abstract Output big-endian Segment32 output segment function. + */ +#define kIODMACommandOutputBig32 (IODMACommand::OutputBig32) + +/*! @function OutputLittle32 + @abstract Output little-endian Segment32 output segment function. +*/ + static bool OutputLittle32(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputLittle32 + @abstract Output little-endian Segment32 output segment function. +*/ +#define kIODMACommandOutputLittle32 (IODMACommand::OutputLittle32) + +/*! @function OutputHost64 + @abstract Output host natural Segment64 output segment function. +*/ + static bool OutputHost64(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputHost64 + @abstract Output host natural Segment64 output segment function. +*/ +#define kIODMACommandOutputHost64 (IODMACommand::OutputHost64) + +/*! @function OutputBig64 + @abstract Output big-endian Segment64 output segment function. +*/ + static bool OutputBig64(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputLittle64 + @abstract Output little-endian Segment64 output segment function. +*/ +#define kIODMACommandOutputBig64 (IODMACommand::OutputBig64) + +/*! @function OutputLittle64 + @abstract Output little-endian Segment64 output segment function. +*/ + static bool OutputLittle64(IODMACommand *target, + Segment64 seg, void *segs, UInt32 ind); + +/*! @defined kIODMACommandOutputBig64 + @abstract Output big-endian Segment64 output segment function. +*/ +#define kIODMACommandOutputLittle64 (IODMACommand::OutputLittle64) + +/*! @function withSpecification + @abstract Creates and initializes an IODMACommand in one operation. + @discussion Factory function to create and initialize an IODMACommand in one operation. + @param outSegFunc SegmentFunction to call to output one physical segment. A set of nine commonly required segment functions are provided. + @param numAddressBits Number of bits that the hardware uses on its internal address bus. Typically 32 but may be more on modern hardware. A 0 implies no-restriction other than that implied by the output segment function. + @param maxSegmentSize Maximum allowable size for one segment. If 0 is passed the maximum segment size is unlimited. + @param mappingOptions is the type of mapping that is required to translate an IOMemoryDescriptor into the desired number of bits. For instance if your hardware only supports 32 bits but must run on machines with > 4G of RAM some mapping will be required. Number of bits will be specified in numAddressBits, see below.This parameter can take 3 values:- kNonCoherent - used for non-coherent hardware transfers, Mapped - Validate that all I/O bus generated addresses are within the number of addressing bits specified, Bypassed indicates that bypassed addressing is required, this is used when the hardware transferes are into coherent memory but no mapping is required. See also prepare() for failure cases. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. + @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. +*/ + static IODMACommand * + withSpecification(SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mappingOptions = kMapped, + UInt64 maxTransferSize = 0, + UInt32 alignment = 1, + IOMapper *mapper = 0, + void *refCon = 0); + +/*! @function weakWithSpecification + @abstract Creates and initialises an IODMACommand in one operation if this version of the operating system supports it. + @discussion Factory function to create and initialise an IODMACommand in one operation. The function allows a developer to 'weak' link with IODMACommand. This function will return kIOReturnUnsupported if the IODMACommand is unavailable. This function is actually fairly slow so it will be better to call it once then clone the successfully create command using cloneCommand (q.v.). + @param newCommand Output reference variable of the newly created IODMACommand. + @param outSegFunc SegmentFunction to call to output one physical segment. A set of nine commonly required segment functions are provided. + @param numAddressBits Number of bits that the hardware uses on its internal address bus. Typically 32 but may be more on modern hardware. A 0 implies no-restriction other than that implied by the output segment function. + @param maxSegmentSize Maximum allowable size for one segment. Zero is treated as an unlimited segment size. + @param mapType is the type of mapping that is required to translate an IOMemoryDescriptor into the desired number of bits. For instance if your hardware only supports 32 bits but must run on machines with > 4G of RAM some mapping will be required. Number of bits will be specified in numAddressBits, see below. This parameter can take 3 values:- kNonCoherent - used for non-coherent hardware transfers, Mapped - Validate that all I/O bus generated addresses are within the number of addressing bits specified, Bypassed indicates that bypassed addressing is required, this is used when the hardware transfers are into coherent memory but no mapping is required. See also prepare() for failure cases. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. + @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. + @result kIOReturnSuccess if everything is OK, otherwise kIOReturnBadArgument if newCommand is NULL, kIOReturnUnsupported if the kernel doesn't export IODMACommand or IOReturnError if the new command fails to init, q.v. initWithSpecification. +*/ + // Note that the function has the attribute always_inline. + // The point of this function is to make a call into the kernel + // without generating an undefined symbol. If the client could call + // the code as a function then the goal of no undefined symbols + // would be lost thus defeating the purpose. + static inline IOReturn weakWithSpecification + (IODMACommand **newCommand, + SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mapType = kMapped, + UInt64 maxTransferSize = 0, + UInt32 alignment = 1, + IOMapper *mapper = 0, + void *refCon = 0) __attribute__((always_inline)); + +/*! + @function cloneCommand + @abstract Creates a new command based on the specification of the current one. + @discussion Factory function to create and initialise an IODMACommand in one operation. The current command's specification will be duplicated in the new object, but however none of its state will be duplicated. This means that it is safe to clone a command even if it is currently active and running, however you must be certain that the command to be duplicated does have a valid reference for the duration. + @result Returns a new memory cursor if successfully created and initialised, 0 otherwise. +*/ + virtual IODMACommand *cloneCommand(void *refCon = 0); + +/*! @function initWithSpecification + @abstract Primary initializer for the IODMACommand class. + @param outSegFunc SegmentFunction to call to output one physical segment. A set of nine commonly required segment functions are provided. + @param numAddressBits Number of bits that the hardware uses on its internal address bus. Typically 32 but may be more on modern hardware. A 0 implies no-restriction other than that implied by the output segment function. + @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0 which means any size. + @param mappingOptions is the type of mapping that is required to translate an IOMemoryDescriptor into the desired number of bits. For instance if your hardware only supports 32 bits but must run on machines with > 4G of RAM some mapping will be required. Number of bits will be specified in numAddressBits, see below.This parameter can take 3 values:- kNonCoherent - used for non-coherent hardware transfers, Mapped - Validate that all I/O bus generated addresses are within the number of addressing bits specified, Bypassed indicates that bypassed addressing is required, this is used when the hardware transferes are into coherent memory but no mapping is required. See also prepare() for failure cases. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restriction, in bytes, on I/O bus addresses. Defaults to single byte alignment. + @param mapper For mapping types kMapped & kBypassed mapper is used to define the hardware that will perform the mapping, defaults to the system mapper. + @result Can fail if the mapping type is not recognised, if one of the 3 mandatory parameters are set to 0, if a 32 bit output function is selected when more than 32 bits of address is required or, if kBypassed is requested on a machine that doesn't support bypassing. Returns true otherwise. +*/ + virtual bool initWithSpecification( SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mappingOptions = kMapped, + UInt64 maxTransferSize = 0, + UInt32 alignment = 1, + IOMapper *mapper = 0, + void *refCon = 0); + +/*! @function setMemoryDescriptor + @abstract Sets and resets the DMACommand's current memory descriptor + @discussion The DMA command will configure itself based on the information that it finds in the memory descriptor. It looks for things like the direction of the memory descriptor and whether the current memory descriptor is already mapped into some IOMMU. As a programmer convenience it can also prepare the memory descriptor immediately. See prepare(). Note the IODMACommand is designed to used multiple times with a succession of memory descriptors, making the pooling of commands possible. It is an error though to attempt to reset a currently prepared() DMA command. Warning: This routine may block so never try to autoprepare an IODMACommand while in a gated context, i.e. one of the WorkLoops action call outs. + @param mem A pointer to the current I/Os memory descriptor. + @param autoPrepare An optional boolean variable that will call the prepare() function automatically after the memory descriptor is processed. Defaults to true. + @result Returns kIOReturnSuccess, kIOReturnBusy if currently prepared, kIOReturnNoSpace if the length(mem) >= Maximum Transfer Size or the error codes returned by prepare() (qv). +*/ + virtual IOReturn setMemoryDescriptor(const IOMemoryDescriptor *mem, + bool autoPrepare = true); + +/*! @function clearMemoryDescriptor + @abstract Clears the DMACommand's current memory descriptor + @discussion completes and invalidates the cache if the DMA command is currently active, copies all data from bounce buffers if necessary and releases all resources acquired during setMemoryDescriptor. + @param autoComplete An optional boolean variable that will call the complete() function automatically before the memory descriptor is processed. Defaults to true. +*/ + virtual IOReturn clearMemoryDescriptor(bool autoComplete = true); + +/*! @function getMemoryDescriptor + @abstract Get the current memory descriptor +*/ + virtual const IOMemoryDescriptor *getMemoryDescriptor() const; + +/*! @function prepare + @abstract Prepare the memory for an I/O transfer. + @discussion Allocate the mapping resources neccessary for this transfer, specifying a sub range of the IOMemoryDescriptor that will be the target of the I/O. The complete() method frees these resources. Data may be copied to buffers for kIODirectionOut memory descriptors, depending on hardware mapping resource availabilty or alignment restrictions. It should be noted that the this function may block and should only be called on the clients context, i.e never call this routine while gated; also the call itself is not thread safe though this should be an issue as each IODMACommand is independant. + @param offset defines the starting offset in the memory descriptor the DMA command will operate on. genIOVMSegments will produce its results based on the offset and length passed to the prepare method. + @param length defines the ending position in the memory descriptor the DMA command will operate on. genIOVMSegments will produce its results based on the offset and length passed to the prepare method. + @param flushCache Flush the caches for the memory descriptor and make certain that the memory cycles are complete. Defaults to true for kNonCoherent and is ignored by the other types. + @param synchronize Copy any buffered data back from the target IOMemoryDescriptor. Defaults to true, if synchronize() is being used to explicitly copy data, passing false may avoid an unneeded copy. + @result An IOReturn code. */ + + virtual IOReturn prepare(UInt64 offset = 0, UInt64 length = 0, bool flushCache = true, bool synchronize = true); + +/*! @function complete + @abstract Complete processing of DMA mappings after an I/O transfer is finished. + @discussion This method should not be called unless a prepare was previously issued; the prepare() and complete() must occur in pairs, before and after an I/O transfer + @param invalidCache Invalidate the caches for the memory descriptor. Defaults to true for kNonCoherent and is ignored by the other types. + @param synchronize Copy any buffered data back to the target IOMemoryDescriptor. Defaults to true, if synchronize() is being used to explicitly copy data, passing false may avoid an unneeded copy. + @result kIOReturnNotReady if not prepared, kIOReturnSuccess otherwise. */ + + virtual IOReturn complete(bool invalidateCache = true, bool synchronize = true); + +/*! @function synchronize + @abstract Bring IOMemoryDescriptor and IODMACommand buffers into sync. + @discussion This method should not be called unless a prepare was previously issued. If needed a caller may synchronize any IODMACommand buffers with the original IOMemoryDescriptor buffers. + @param options Specifies the direction of the copy: + kIODirectionOut copy IOMemoryDesciptor memory to any IODMACommand buffers. By default this action takes place automatically at prepare(). + kIODirectionIn copy any IODMACommand buffers back to the IOMemoryDescriptor. By default this action takes place automatically at complete(). + kForceDoubleBuffer copy the entire prepared range to a new page aligned buffer. + @result kIOReturnNotReady if not prepared, kIOReturnBadArgument if invalid options are passed, kIOReturnSuccess otherwise. */ + + virtual IOReturn synchronize(IOOptionBits options); + +/*! @function genIOVMSegments + @abstract Generates a physical scatter/gather for the current DMA command + @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. The constraints that are set during initialisation will be respected. This function maintains the state across multiple calls for efficiency. However the state is discarded if the new offset is not the expected one. + @param offset input/output parameter, defines the starting and ending offset in the memory descriptor, relative to any offset passed to the prepare() method. + @param segments Void pointer to base of output physical scatter/gather list. Always passed directly onto the SegmentFunction. + @param numSegments Input/output parameter Number of segments that can fit in the segment array and returns number of segments generated. + @result kIOReturnSuccess on success, kIOReturnOverrun if the memory descriptor is exhausted, kIOReturnMessageTooLarge if the output segment function's address bits has insufficient resolution for a segment, kIOReturnNotReady if the DMA command has not be prepared, kIOReturnBadArgument if the DMA command doesn't have a memory descriptor yet or some of the parameters are NULL and kIOReturnNotReady if the DMA command is not prepared. +*/ + virtual IOReturn genIOVMSegments(UInt64 *offset, + void *segments, + UInt32 *numSegments); + +/*! @function gen32IOVMSegments + @abstract Helper function for a type checked call to genIOVMSegments(qv), for use with an IODMACommand set up with the output function kIODMACommandOutputHost32, kIODMACommandOutputBig32, or kIODMACommandOutputLittle32. If the output function of the IODMACommand is not a 32 bit function, results will be incorrect. +*/ + inline IOReturn gen32IOVMSegments(UInt64 *offset, + Segment32 *segments, + UInt32 *numSegments) + { return genIOVMSegments(offset, segments, numSegments); }; + +/*! @function gen64IOVMSegments + @abstract Helper function for a type checked call to genIOVMSegments(qv), for use with an IODMACommand set up with the output function kIODMACommandOutputHost64, kIODMACommandOutputBig64, or kIODMACommandOutputLittle64. If the output function of the IODMACommand is not a 64 bit function, results will be incorrect. +*/ + inline IOReturn gen64IOVMSegments(UInt64 *offset, + Segment64 *segments, + UInt32 *numSegments) + { return genIOVMSegments(offset, segments, numSegments); }; + + virtual void free(); + +private: + + typedef IOReturn (*InternalSegmentFunction)( + void *reference, + IODMACommand *target, + Segment64 segment, + void *segments, + UInt32 segmentIndex); + + IOReturn genIOVMSegments(InternalSegmentFunction outSegFunc, + void *reference, + UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP); + + static IOReturn clientOutputSegment( + void *reference, IODMACommand *target, + Segment64 segment, void *vSegList, UInt32 outSegIndex); + + static IOReturn segmentOp( + void *reference, + IODMACommand *target, + Segment64 segment, + void *segments, + UInt32 segmentIndex); + IOReturn IODMACommand::walkAll(UInt8 op); + +private: + OSMetaClassDeclareReservedUnused(IODMACommand, 0); + OSMetaClassDeclareReservedUnused(IODMACommand, 1); + OSMetaClassDeclareReservedUnused(IODMACommand, 2); + OSMetaClassDeclareReservedUnused(IODMACommand, 3); + OSMetaClassDeclareReservedUnused(IODMACommand, 4); + OSMetaClassDeclareReservedUnused(IODMACommand, 5); + OSMetaClassDeclareReservedUnused(IODMACommand, 6); + OSMetaClassDeclareReservedUnused(IODMACommand, 7); + OSMetaClassDeclareReservedUnused(IODMACommand, 8); + OSMetaClassDeclareReservedUnused(IODMACommand, 9); + OSMetaClassDeclareReservedUnused(IODMACommand, 10); + OSMetaClassDeclareReservedUnused(IODMACommand, 11); + OSMetaClassDeclareReservedUnused(IODMACommand, 12); + OSMetaClassDeclareReservedUnused(IODMACommand, 13); + OSMetaClassDeclareReservedUnused(IODMACommand, 14); + OSMetaClassDeclareReservedUnused(IODMACommand, 15); + +public: +/*! @var fRefCon Reference Constant, client defined publicly avialable */ + void *fRefCon; + +protected: + +/*! @var fMaxSegmentSize Maximum size of one segment in a scatter/gather list */ + UInt64 fMaxSegmentSize; + +/*! @var fMaxTransferSize + Maximum size of a transfer that this memory cursor is allowed to generate */ + UInt64 fMaxTransferSize; + +/*! @var fBypassMask + Mask to be ored into the address to bypass the given iommu's mapping. */ + UInt64 fBypassMask; + +/*! @var fMapper + Client defined mapper. */ + IOMapper *fMapper; + +/*! @var fMemory + memory descriptor for current I/O. */ + const IOMemoryDescriptor *fMemory; + +/*! @var fOutSeg The action method called when an event has been delivered */ + SegmentFunction fOutSeg; + +/*! @var fAlignMask + Alignment restriction mask. */ + UInt32 fAlignMask; + +/*! @var fNumAddressBits + Number of bits that the hardware can address */ + UInt32 fNumAddressBits; + +/*! @var fNumSegments + Number of contiguous segments required for the current memory descriptor and desired mapping */ + UInt32 fNumSegments; + +/*! @var fMappingOptions + What type of I/O virtual address mapping is required for this command */ + MappingOptions fMappingOptions; + +/*! @var fActive + fActive indicates that this DMA command is currently prepared and ready to go */ + UInt32 fActive; + +/*! @var reserved + Reserved for future use. (Internal use only) */ + struct ExpansionData * reserved; +}; + +IOReturn IODMACommand:: +weakWithSpecification(IODMACommand **newCommand, + SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mapType, + UInt64 maxTransferSize, + UInt32 alignment, + IOMapper *mapper, + void *refCon) +{ + if (!newCommand) + return kIOReturnBadArgument; + + IODMACommand *self = (IODMACommand *) + OSMetaClass::allocClassWithName("IODMACommand"); + if (!self) + return kIOReturnUnsupported; + + IOReturn ret; + bool inited = self-> + initWithSpecification(outSegFunc, + numAddressBits, maxSegmentSize, mapType, + maxTransferSize, alignment, mapper, refCon); + if (inited) + ret = kIOReturnSuccess; + else { + self->release(); + self = 0; + ret = kIOReturnError; + } + + *newCommand = self; + return ret; +}; +#endif /* !_IODMACOMMAND_H */ + diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index 01e3b860c..7f737f689 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -65,7 +65,10 @@ struct IOHibernateImageHeader uint32_t signature; uint32_t processorFlags; - uint8_t reserved2[24]; + uint32_t runtimePages; + uint32_t runtimePageCount; + + uint8_t reserved2[16]; uint64_t encryptStart; uint64_t machineSignature; @@ -75,7 +78,13 @@ struct IOHibernateImageHeader uint32_t diag[4]; - uint32_t reserved[82]; // make sizeof == 512 + int32_t graphicsInfoOffset; + int32_t cryptVarsOffset; + int32_t memoryMapOffset; + uint32_t memoryMapSize; + uint32_t systemTableOffset; + + uint32_t reserved[77]; // make sizeof == 512 uint32_t fileExtentMapSize; IOPolledFileExtent fileExtentMap[2]; @@ -225,15 +234,25 @@ hibernate_vm_lock(void); void hibernate_vm_unlock(void); +// mark pages not to be saved, based on VM system accounting void hibernate_page_list_setall(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, uint32_t * pagesOut); +// mark pages to be saved, or pages not to be saved but available +// for scratch usage during restore void hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, uint32_t * pagesOut); + +// mark pages not to be saved and not for scratch usage during restore +void +hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut); + void hibernate_page_list_discard(hibernate_page_list_t * page_list); @@ -243,11 +262,15 @@ hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_ void hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page); + boolean_t hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page); +hibernate_bitmap_t * +hibernate_page_bitmap_pin(hibernate_page_list_t * list, uint32_t * page); + uint32_t -hibernate_page_list_count(hibernate_page_list_t *list, uint32_t set, uint32_t page); +hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t page); void hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); @@ -261,6 +284,10 @@ long hibernate_machine_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); long hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); +void +hibernate_newruntime_map(void * map, vm_size_t map_size, + uint32_t system_table_offset); + extern uint32_t gIOHibernateState; extern uint32_t gIOHibernateMode; @@ -330,6 +357,8 @@ enum #define kIOHibernateMemorySignatureEnvKey "mem-sig" #define kIOHibernateMachineSignatureKey "machine-signature" +#define kIOHibernateRTCVariablesKey "IOHibernateRTCVariables" + #ifdef __cplusplus } #endif diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h index f92c4263d..11a17a23b 100644 --- a/iokit/IOKit/IOKitKeys.h +++ b/iokit/IOKit/IOKitKeys.h @@ -76,6 +76,10 @@ // IOService default user client class, for loadable user clients #define kIOUserClientClassKey "IOUserClientClass" +#define kIOUserClientCrossEndianKey "IOUserClientCrossEndian" +#define kIOUserClientCrossEndianCompatibleKey "IOUserClientCrossEndianCompatible" +#define kIOUserClientSharedInstanceKey "IOUserClientSharedInstance" + // IOService notification types #define kIOPublishNotification "IOServicePublish" #define kIOFirstPublishNotification "IOServiceFirstPublish" @@ -89,6 +93,8 @@ #define kIOAppPowerStateInterest "IOAppPowerStateInterest" #define kIOPriorityPowerStateInterest "IOPriorityPowerStateInterest" +#define kIOPlatformDeviceMessageKey "IOPlatformDeviceMessage" + // IOService interest notification types #define kIOCFPlugInTypesKey "IOCFPlugInTypes" @@ -96,14 +102,16 @@ #define kIOCommandPoolSizeKey "IOCommandPoolSize" // (OSNumber) // properties found in services that have transfer constraints -#define kIOMaximumBlockCountReadKey "IOMaximumBlockCountRead" // (OSNumber) -#define kIOMaximumBlockCountWriteKey "IOMaximumBlockCountWrite" // (OSNumber) -#define kIOMaximumByteCountReadKey "IOMaximumByteCountRead" // (OSNumber) -#define kIOMaximumByteCountWriteKey "IOMaximumByteCountWrite" // (OSNumber) -#define kIOMaximumSegmentCountReadKey "IOMaximumSegmentCountRead" // (OSNumber) -#define kIOMaximumSegmentCountWriteKey "IOMaximumSegmentCountWrite" // (OSNumber) -#define kIOMaximumSegmentByteCountReadKey "IOMaximumSegmentByteCountRead" // (OSNumber) -#define kIOMaximumSegmentByteCountWriteKey "IOMaximumSegmentByteCountWrite" // (OSNumber) +#define kIOMaximumBlockCountReadKey "IOMaximumBlockCountRead" // (OSNumber) +#define kIOMaximumBlockCountWriteKey "IOMaximumBlockCountWrite" // (OSNumber) +#define kIOMaximumByteCountReadKey "IOMaximumByteCountRead" // (OSNumber) +#define kIOMaximumByteCountWriteKey "IOMaximumByteCountWrite" // (OSNumber) +#define kIOMaximumSegmentCountReadKey "IOMaximumSegmentCountRead" // (OSNumber) +#define kIOMaximumSegmentCountWriteKey "IOMaximumSegmentCountWrite" // (OSNumber) +#define kIOMaximumSegmentByteCountReadKey "IOMaximumSegmentByteCountRead" // (OSNumber) +#define kIOMaximumSegmentByteCountWriteKey "IOMaximumSegmentByteCountWrite" // (OSNumber) +#define kIOMinimumSegmentAlignmentByteCountKey "IOMinimumSegmentAlignmentByteCount" // (OSNumber) +#define kIOMaximumSegmentAddressableBitCountKey "IOMaximumSegmentAddressableBitCount" // (OSNumber) // properties found in services that wish to describe an icon // diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index 0db1a4db5..222ef95ad 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -28,6 +28,7 @@ // properties found in the registry root #define kIOConsoleUsersKey "IOConsoleUsers" /* value is OSArray */ #define kIOMaximumMappedIOByteCountKey "IOMaximumMappedIOByteCount" /* value is OSNumber */ +#define kIOStartupMkextCRC "IOStartupMkextCRC" /* value is 32-bit OSNumber */ // properties found in the console user dict @@ -43,4 +44,8 @@ #define kIOKernelHasSafeSleep 1 +enum { kIOPrepareToPhys32 = 0x04 }; + +#define kIODirectionPrepareToPhys32 ((IODirection) kIOPrepareToPhys32) + #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOLib.h b/iokit/IOKit/IOLib.h index a792dc154..ccbf5554c 100644 --- a/iokit/IOKit/IOLib.h +++ b/iokit/IOKit/IOLib.h @@ -105,7 +105,7 @@ void IOFreeAligned(void * address, vm_size_t size); @discussion This is a utility to allocate memory in the kernel, with an alignment restriction which is specified as a byte count, and will allocate only physically contiguous memory. The request may fail if memory is fragmented, and may cause large amounts of paging activity. This function may block and so should not be called from interrupt level or while a simple lock is held. @param size Size of the memory requested. @param alignment Byte count of the alignment for the memory. For example, pass 256 to get memory allocated at an address with bits 0-7 zero. - @param physicalAddress IOMallocContiguous returns the physical address of the allocated memory here, if physicalAddress is a non-zero pointer. + @param physicalAddress IOMallocContiguous returns the physical address of the allocated memory here, if physicalAddress is a non-zero pointer. The physicalAddress argument is deprecated and should be passed as NULL. To obtain the physical address for a memory buffer, use the IODMACommand class in conjunction with the IOMemoryDescriptor or IOBufferMemoryDescriptor classes. @result Virtual address of the allocated memory, or zero on failure. */ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, @@ -259,7 +259,7 @@ IOThread IOCreateThread(IOThreadFunc function, void *argument); @abstract Terminate exceution of current thread. @discussion This function destroys the currently running thread, and does not return. */ -volatile void IOExitThread(void); +void IOExitThread(void) __dead2; /*! @function IOSleep @abstract Sleep the calling thread for a number of milliseconds. diff --git a/iokit/IOKit/IOMapper.h b/iokit/IOKit/IOMapper.h index 83bdfd570..6507f5d7e 100644 --- a/iokit/IOKit/IOMapper.h +++ b/iokit/IOKit/IOMapper.h @@ -38,6 +38,11 @@ void IOMapperInsertPPNPages(ppnum_t addr, unsigned offset, ppnum_t *pageList, unsigned pageCount); void IOMapperInsertUPLPages(ppnum_t addr, unsigned offset, upl_page_info_t *pageList, unsigned pageCount); + +mach_vm_address_t IOMallocPhysical(mach_vm_size_t size, mach_vm_address_t mask); + +void IOFreePhysical(mach_vm_address_t address, mach_vm_size_t size); + __END_DECLS #if __cplusplus @@ -68,8 +73,6 @@ class IOMapper : public IOService OSData *fTableHandle; bool fIsSystem; - virtual bool start(IOService *provider); - virtual void free(); static void setMapperRequired(bool hasMapper); static void waitForSystemMapper(); @@ -79,6 +82,11 @@ class IOMapper : public IOService virtual bool allocTable(IOByteCount size); public: +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual bool start(IOService *provider); + virtual void free(); +#endif + // Static routines capable of allocating tables that are physically // contiguous in real memory space. static OSData * NewARTTable(IOByteCount size, @@ -104,8 +112,16 @@ class IOMapper : public IOService // iovm mapping. virtual addr64_t mapAddr(IOPhysicalAddress addr) = 0; +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + // Get the address mask to or into an address to bypass this mapper + virtual bool getBypassMask(addr64_t *maskP) const + OSMetaClassDeclareReservedUsed(IOMapper, 0); +#endif + private: +#if (defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) OSMetaClassDeclareReservedUnused(IOMapper, 0); +#endif OSMetaClassDeclareReservedUnused(IOMapper, 1); OSMetaClassDeclareReservedUnused(IOMapper, 2); OSMetaClassDeclareReservedUnused(IOMapper, 3); diff --git a/iokit/IOKit/IOMemoryCursor.h b/iokit/IOKit/IOMemoryCursor.h index bb4fdb3a6..28a0bbcd4 100644 --- a/iokit/IOKit/IOMemoryCursor.h +++ b/iokit/IOKit/IOMemoryCursor.h @@ -206,7 +206,7 @@ class IONaturalMemoryCursor : public IOMemoryCursor @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. - @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. + @param inMaxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ @@ -214,11 +214,11 @@ class IONaturalMemoryCursor : public IOMemoryCursor IOByteCount fromPosition, PhysicalSegment *segments, UInt32 maxSegments, - UInt32 maxTransferSize = 0, + UInt32 inMaxTransferSize = 0, IOByteCount *transferSize = 0) { return genPhysicalSegments(descriptor, fromPosition, segments, - maxSegments, maxTransferSize, transferSize); + maxSegments, inMaxTransferSize, transferSize); } }; @@ -282,7 +282,7 @@ successfully. @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. - @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. + @param inMaxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ @@ -290,11 +290,11 @@ successfully. IOByteCount fromPosition, PhysicalSegment * segments, UInt32 maxSegments, - UInt32 maxTransferSize = 0, + UInt32 inMaxTransferSize = 0, IOByteCount * transferSize = 0) { return genPhysicalSegments(descriptor, fromPosition, segments, - maxSegments, maxTransferSize, transferSize); + maxSegments, inMaxTransferSize, transferSize); } }; @@ -356,7 +356,7 @@ class IOLittleMemoryCursor : public IOMemoryCursor @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. - @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. + @param inMaxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ @@ -364,11 +364,11 @@ class IOLittleMemoryCursor : public IOMemoryCursor IOByteCount fromPosition, PhysicalSegment * segments, UInt32 maxSegments, - UInt32 maxTransferSize = 0, + UInt32 inMaxTransferSize = 0, IOByteCount * transferSize = 0) { return genPhysicalSegments(descriptor, fromPosition, segments, - maxSegments, maxTransferSize, transferSize); + maxSegments, inMaxTransferSize, transferSize); } }; @@ -434,7 +434,7 @@ class IODBDMAMemoryCursor : public IOMemoryCursor @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Pointer to an array of DBDMA descriptors for the output physical scatter/gather list. Be warned no room is left for a preamble in the output array. 'segments' should point to the first memory description slot in a DBDMA command. @param maxSegments Maximum number of segments that can be written to the DBDMA descriptor table. - @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. + @param inMaxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ @@ -442,11 +442,11 @@ class IODBDMAMemoryCursor : public IOMemoryCursor IOByteCount fromPosition, IODBDMADescriptor * segments, UInt32 maxSegments, - UInt32 maxTransferSize = 0, + UInt32 inMaxTransferSize = 0, IOByteCount * transferSize = 0) { return genPhysicalSegments(descriptor, fromPosition, segments, - maxSegments, maxTransferSize, transferSize); + maxSegments, inMaxTransferSize, transferSize); } }; diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index c370718fb..9d37372fd 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -64,6 +64,8 @@ enum { kIOMemoryTypeUPL = 0x00000030, kIOMemoryTypePersistentMD = 0x00000040, // Persistent Memory Descriptor kIOMemoryTypeUIO = 0x00000050, + kIOMemoryTypeVirtual64 = 0x00000060, + kIOMemoryTypePhysical64 = 0x00000070, kIOMemoryTypeMask = 0x000000f0, kIOMemoryAsReference = 0x00000100, @@ -88,6 +90,9 @@ enum kIOMemoryIncoherentIOStore = 2, }; +#define IOMEMORYDESCRIPTOR_SUPPORTS_DMACOMMAND 1 + + /*! @class IOMemoryDescriptor : public OSObject @abstract An abstract base class defining common methods for describing physical or virtual memory. @discussion The IOMemoryDescriptor object represents a buffer or range of memory, specified as one or more physical or virtual address ranges. It contains methods to return the memory's physically contiguous segments (fragments), for use with the IOMemoryCursor, and methods to map the memory into any address space with caching and placed mapping options. */ @@ -124,7 +129,7 @@ class IOMemoryDescriptor : public OSObject IOOptionBits _tag; public: - +typedef IOOptionBits DMACommandOps; virtual IOPhysicalAddress getSourceSegment( IOByteCount offset, IOByteCount * length ); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 0); @@ -138,7 +143,7 @@ class IOMemoryDescriptor : public OSObject UInt32 offset, task_t task, IOOptionBits options, - IOMapper * mapper = 0); + IOMapper * mapper = kIOMapperSystem); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 1); virtual addr64_t getPhysicalSegment64( IOByteCount offset, @@ -178,9 +183,16 @@ class IOMemoryDescriptor : public OSObject IOByteCount offset, IOByteCount length ); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 4); -private: +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + // Used for dedicated communications for IODMACommand + virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 5); +#endif +private: +#if (defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 5); +#endif OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 6); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 7); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 8); @@ -253,6 +265,43 @@ class IOMemoryDescriptor : public OSObject task_t withTask, bool asReference = false); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) +/*! @function withAddressRange + @abstract Create an IOMemoryDescriptor to describe one virtual range of the specified map. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the specified map. + @param address The virtual address of the first byte in the memory. + @param withLength The length of memory. + @param options + kIOMemoryDirectionMask (options:direction) This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. + kIOMemoryNoAutoPrepare Indicates that the temporary AutoPrepare of kernel_task memory should not be performed. + @param task The task the virtual ranges are mapped into. + @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ + + static IOMemoryDescriptor * withAddressRange( + mach_vm_address_t address, + mach_vm_size_t length, + IOOptionBits options, + task_t task); + +/*! @function withAddressRanges + @abstract Create an IOMemoryDescriptor to describe one or more virtual ranges. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of an array of virtual memory ranges each mapped into a specified source task. + @param ranges An array of IOAddressRange structures which specify the virtual ranges in the specified map which make up the memory to be described. IOAddressRange is the 64bit version of IOVirtualRange. + @param rangeCount The member count of the ranges array. + @param options + kIOMemoryDirectionMask (options:direction) This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. + kIOMemoryAsReference For options:type = Virtual or Physical this indicate that the memory descriptor need not copy the ranges array into local memory. This is an optimisation to try to minimise unnecessary allocations. + kIOMemoryNoAutoPrepare Indicates that the temporary AutoPrepare of kernel_task memory should not be performed. + @param task The task each of the virtual ranges are mapped into. + @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ + + static IOMemoryDescriptor * withAddressRanges( + IOAddressRange * ranges, + UInt32 rangeCount, + IOOptionBits options, + task_t withTask); +#endif + /*! @function withOptions @abstract Master initialiser for all variants of memory descriptors. @discussion This method creates and initializes an IOMemoryDescriptor for memory it has three main variants: Virtual, Physical & mach UPL. These variants are selected with the options parameter, see below. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. However we temporarily have setup a mechanism that automatically prepares kernel_task memory descriptors at creation time. @@ -282,7 +331,7 @@ class IOMemoryDescriptor : public OSObject UInt32 offset, task_t task, IOOptionBits options, - IOMapper * mapper = 0); + IOMapper * mapper = kIOMapperSystem); /*! @function withPhysicalRanges @abstract Create an IOMemoryDescriptor to describe one or more physical ranges. @@ -676,8 +725,9 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor public: union Ranges { - IOVirtualRange * v; - IOPhysicalRange * p; + IOVirtualRange *v; + IOAddressRange *v64; + IOPhysicalRange *p; void *uio; }; protected: @@ -702,36 +752,28 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor virtual void free(); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; +#endif private: - // Internal APIs may be made virtual at some time in the future. - IOReturn wireVirtual(IODirection forDirection); - void *createNamedEntry(); - - /* DEPRECATED */ IOByteCount _position; /* absolute position over all ranges */ /* DEPRECATED */ virtual void setPosition(IOByteCount position); - -/* - * DEPRECATED IOByteCount _positionAtIndex; // relative position within range #n - * - * Re-use the _positionAtIndex as a count of the number of pages in - * this memory descriptor. Convieniently vm_address_t is an unsigned integer - * type so I can get away without having to change the type. - */ - unsigned int _pages; - -/* DEPRECATED */ unsigned _positionAtOffset; //range #n in which position is now - - OSData *_memoryEntries; - - /* DEPRECATED */ vm_offset_t _kernPtrAligned; - /* DEPRECATED */ unsigned _kernPtrAtIndex; - /* DEPRECATED */ IOByteCount _kernSize; - /* DEPRECATED */ virtual void mapIntoKernel(unsigned rangeIndex); /* DEPRECATED */ virtual void unmapFromKernel(); + // Internal APIs may be made virtual at some time in the future. + IOReturn wireVirtual(IODirection forDirection); + void *createNamedEntry(); + + // Internal + OSData * _memoryEntries; + unsigned int _pages; + ppnum_t _highestPage; + uint32_t __iomd_reservedA; + uint32_t __iomd_reservedB; + uint32_t __iomd_reservedC; + public: /* * IOMemoryDescriptor required methods @@ -743,7 +785,7 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor UInt32 offset, task_t task, IOOptionBits options, - IOMapper * mapper = 0); + IOMapper * mapper = kIOMapperSystem); // Secondary initialisers virtual bool initWithAddress(void * address, @@ -771,6 +813,11 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor IODirection withDirection, bool asReference = false); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual addr64_t getPhysicalSegment64( IOByteCount offset, + IOByteCount * length ); +#endif + virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, IOByteCount * length); @@ -847,6 +894,11 @@ class IOSubMemoryDescriptor : public IOMemoryDescriptor IOMemoryDescriptor::withRanges; IOMemoryDescriptor::withSubRange; +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + // used by IODMACommand + virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; +#endif + public: /* * Initialize or reinitialize an IOSubMemoryDescriptor to describe @@ -864,6 +916,11 @@ class IOSubMemoryDescriptor : public IOMemoryDescriptor * IOMemoryDescriptor required methods */ +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual addr64_t getPhysicalSegment64( IOByteCount offset, + IOByteCount * length ); +#endif + virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, IOByteCount * length); diff --git a/iokit/IOKit/IOMultiMemoryDescriptor.h b/iokit/IOKit/IOMultiMemoryDescriptor.h index 1032842a6..190dca1da 100644 --- a/iokit/IOKit/IOMultiMemoryDescriptor.h +++ b/iokit/IOKit/IOMultiMemoryDescriptor.h @@ -120,6 +120,11 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor virtual IOPhysicalAddress getPhysicalSegment( IOByteCount offset, IOByteCount * length ); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual addr64_t getPhysicalSegment64( IOByteCount offset, + IOByteCount * length ); +#endif + /*! @function prepare @abstract Prepare the memory for an I/O transfer. @discussion This involves paging in the memory, if necessary, and wiring it down for the duration of the transfer. The complete() method completes the processing of the memory after the I/O transfer finishes. This method needn't called for non-pageable memory. diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index b6f6040c3..ff98488f7 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -46,7 +46,8 @@ extern int PEGetPlatformEpoch( void ); enum { kPEHaltCPU, kPERestartCPU, - kPEHangCPU + kPEHangCPU, + kPEUPSDelayHaltCPU }; extern int (*PE_halt_restart)(unsigned int type); extern int PEHaltRestart(unsigned int type); @@ -267,6 +268,9 @@ class IOPlatformExpertDevice : public IOService virtual void free(); + virtual bool attachToChild( IORegistryEntry * child, + const IORegistryPlane * plane ); + OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice, 0); OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice, 1); OSMetaClassDeclareReservedUnused(IOPlatformExpertDevice, 2); diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index 012585340..32f8ad690 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -46,8 +46,16 @@ typedef kern_return_t IOReturn; #define sub_iokit_firewire err_sub(2) #define sub_iokit_block_storage err_sub(4) #define sub_iokit_graphics err_sub(5) +#define sub_iokit_networking err_sub(6) #define sub_iokit_bluetooth err_sub(8) #define sub_iokit_pmu err_sub(9) +#define sub_iokit_acpi err_sub(10) +#define sub_iokit_smbus err_sub(11) +#define sub_iokit_ahci err_sub(12) +#define sub_iokit_powermanagement err_sub(13) +//#define sub_iokit_hidsystem err_sub(14) +//#define sub_iokit_pccard err_sub(21) + #define sub_iokit_vendor_specific err_sub(-2) #define sub_iokit_reserved err_sub(-1) diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index 71c47184e..39127f5ef 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -376,6 +376,8 @@ class IOService : public IORegistryEntry OSMetaClassDeclareReservedUnused(IOService, 45); OSMetaClassDeclareReservedUnused(IOService, 46); OSMetaClassDeclareReservedUnused(IOService, 47); + +#ifdef __ppc__ OSMetaClassDeclareReservedUnused(IOService, 48); OSMetaClassDeclareReservedUnused(IOService, 49); OSMetaClassDeclareReservedUnused(IOService, 50); @@ -392,6 +394,7 @@ class IOService : public IORegistryEntry OSMetaClassDeclareReservedUnused(IOService, 61); OSMetaClassDeclareReservedUnused(IOService, 62); OSMetaClassDeclareReservedUnused(IOService, 63); +#endif public: /*! @function getState @@ -974,7 +977,7 @@ class IOService : public IORegistryEntry /*! @function disableInterrupt @abstract Disable a device interrupt. - @discussion Disable a device interrupt. It is the callers responsiblity to keep track of the enable state of the interrupt source. + @discussion Synchronously disable a device interrupt. If the interrupt routine is running, the call will block until the routine completes. It is the callers responsiblity to keep track of the enable state of the interrupt source. @param source The index of the interrupt source in the device. @result An IOReturn code.
kIOReturnNoInterrupt is returned if the source is not valid. */ @@ -1158,11 +1161,16 @@ class IOService : public IORegistryEntry static void actionFinalize( IOService * victim, IOOptionBits options ); static void actionStop( IOService * client, IOService * provider ); - void PMfree( void ); - virtual IOReturn resolveInterrupt(IOService *nub, int source); virtual IOReturn lookupInterrupt(int source, bool resolve, IOInterruptController **interruptController); + // SPI to control CPU low power modes + void setCPUSnoopDelay(UInt32 ns); + UInt32 getCPUSnoopDelay(); + void requireMaxBusStall(UInt32 ns); + + void PMfree( void ); + /* power management */ /*! @function PMinit @@ -1856,6 +1864,7 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); IOReturn allowCancelCommon ( void ); void computeDesiredState ( void ); void rebuildChildClampBits ( void ); + IOReturn temporaryMakeUsable ( void ); }; #endif /* ! _IOKIT_IOSERVICE_H */ diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index d6c42e12b..8e01e7e66 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -76,71 +76,81 @@ IOTimeStamp(unsigned int csc, #endif /* KDEBUG */ -#define IODBG_SCSI(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSCSI, code)) -#define IODBG_DISK(code) (KDBG_CODE(DBG_IOKIT, DBG_IODISK, code)) -#define IODBG_NETWORK(code) (KDBG_CODE(DBG_IOKIT, DBG_IONETWORK, code)) -#define IODBG_KEYBOARD(code) (KDBG_CODE(DBG_IOKIT, DBG_IOKEYBOARD, code)) -#define IODBG_POINTING(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPOINTING, code)) -#define IODBG_AUDIO(code) (KDBG_CODE(DBG_IOKIT, DBG_IOAUDIO, code)) -#define IODBG_FLOPPY(code) (KDBG_CODE(DBG_IOKIT, DBG_IOFLOPPY, code)) -#define IODBG_SERIAL(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSERIAL, code)) -#define IODBG_TTY(code) (KDBG_CODE(DBG_IOKIT, DBG_IOTTY, code)) +#define IODBG_STORAGE(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSTORAGE, code)) +#define IODBG_NETWORK(code) (KDBG_CODE(DBG_IOKIT, DBG_IONETWORK, code)) +#define IODBG_KEYBOARD(code) (KDBG_CODE(DBG_IOKIT, DBG_IOKEYBOARD, code)) +#define IODBG_HID(code) (KDBG_CODE(DBG_IOKIT, DBG_IOHID, code)) +#define IODBG_AUDIO(code) (KDBG_CODE(DBG_IOKIT, DBG_IOAUDIO, code)) +#define IODBG_SERIAL(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSERIAL, code)) +#define IODBG_TTY(code) (KDBG_CODE(DBG_IOKIT, DBG_IOTTY, code)) +#define IODBG_SAM(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSAM, code)) +#define IODBG_PARALLELATA(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPARALLELATA, code)) +#define IODBG_PARALLELSCSI(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPARALLELSCSI, code)) +#define IODBG_SATA(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSATA, code)) +#define IODBG_SAS(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSAS, code)) +#define IODBG_FIBRECHANNEL(code) (KDBG_CODE(DBG_IOKIT, DBG_IOFIBRECHANNEL, code)) +#define IODBG_USB(code) (KDBG_CODE(DBG_IOKIT, DBG_IOUSB, code)) +#define IODBG_BLUETOOTH(code) (KDBG_CODE(DBG_IOKIT, DBG_IOBLUETOOTH, code)) +#define IODBG_FIREWIRE(code) (KDBG_CODE(DBG_IOKIT, DBG_IOFIREWIRE, code)) +#define IODBG_INFINIBAND(code) (KDBG_CODE(DBG_IOKIT, DBG_IOINFINIBAND, code)) + + +/* Backwards compatibility */ +#define IODBG_DISK(code) IODBG_STORAGE(code) +#define IODBG_POINTING(code) IODBG_HID(code) + /* IOKit infrastructure subclasses */ -#define IODBG_WORKLOOP(code) (KDBG_CODE(DBG_IOKIT, DBG_IOWORKLOOP, code)) -#define IODBG_INTES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOINTES, code)) -#define IODBG_TIMES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOCLKES, code)) -#define IODBG_CMDQ(code) (KDBG_CODE(DBG_IOKIT, DBG_IOCMDQ, code)) -#define IODBG_MCURS(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMCURS, code)) -#define IODBG_MDESC(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMDESC, code)) -#define IODBG_POWER(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPOWER, code)) +#define IODBG_WORKLOOP(code) (KDBG_CODE(DBG_IOKIT, DBG_IOWORKLOOP, code)) +#define IODBG_INTES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOINTES, code)) +#define IODBG_TIMES(code) (KDBG_CODE(DBG_IOKIT, DBG_IOCLKES, code)) +#define IODBG_CMDQ(code) (KDBG_CODE(DBG_IOKIT, DBG_IOCMDQ, code)) +#define IODBG_MCURS(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMCURS, code)) +#define IODBG_MDESC(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMDESC, code)) +#define IODBG_POWER(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPOWER, code)) /* IOKit specific codes - within each subclass */ -/* DBG_IOKIT/DBG_IOSCSI codes */ - /* DBG_IOKIT/DBG_IODISK codes */ /* DBG_IOKIT/DBG_IONETWORK codes */ /* DBG_IOKIT/DBG_IOKEYBOARD codes */ -/* DBG_IOKIT/DBG_IOPOINTING codes */ +/* DBG_IOKIT/DBG_IOHID codes */ /* DBG_IOKIT/DBG_IOAUDIO codes */ -/* DBG_IOKIT/DBG_IOFLOPPY codes */ - /* DBG_IOKIT/DBG_IOSERIAL codes */ /* DBG_IOKIT/DBG_IOTTY codes */ /* DBG_IOKIT/DBG_IOWORKLOOP codes */ -#define IOWL_CLIENT 1 /* 0x050a0004 */ -#define IOWL_WORK 2 /* 0x050a0008 */ +#define IOWL_CLIENT 1 /* 0x05010004 */ +#define IOWL_WORK 2 /* 0x05010008 */ /* DBG_IOKIT/DBG_IOINTES codes */ -#define IOINTES_CLIENT 1 /* 0x050b0004 */ -#define IOINTES_LAT 2 /* 0x050b0008 */ -#define IOINTES_SEMA 3 /* 0x050b000c */ -#define IOINTES_INTCTXT 4 /* 0x050b0010 */ -#define IOINTES_INTFLTR 5 /* 0x050b0014 */ -#define IOINTES_ACTION 6 /* 0x050b0018 */ -#define IOINTES_FILTER 7 /* 0x050b001c */ +#define IOINTES_CLIENT 1 /* 0x05020004 */ +#define IOINTES_LAT 2 /* 0x05020008 */ +#define IOINTES_SEMA 3 /* 0x0502000c */ +#define IOINTES_INTCTXT 4 /* 0x05020010 */ +#define IOINTES_INTFLTR 5 /* 0x05020014 */ +#define IOINTES_ACTION 6 /* 0x05020018 */ +#define IOINTES_FILTER 7 /* 0x0502001c */ /* DBG_IOKIT/DBG_IOTIMES codes */ -#define IOTIMES_CLIENT 1 /* 0x050c0004 */ -#define IOTIMES_LAT 2 /* 0x050c0008 */ -#define IOTIMES_SEMA 3 /* 0x050c000c */ -#define IOTIMES_ACTION 4 /* 0x050c0010 */ +#define IOTIMES_CLIENT 1 /* 0x05030004 */ +#define IOTIMES_LAT 2 /* 0x05030008 */ +#define IOTIMES_SEMA 3 /* 0x0503000c */ +#define IOTIMES_ACTION 4 /* 0x05030010 */ /* DBG_IOKIT/DBG_IOCMDQ codes */ -#define IOCMDQ_CLIENT 1 /* 0x050d0004 */ -#define IOCMDQ_LAT 2 /* 0x050d0008 */ -#define IOCMDQ_SEMA 3 /* 0x050d000c */ -#define IOCMDQ_PSEMA 4 /* 0x050d0010 */ -#define IOCMDQ_PLOCK 5 /* 0x050d0014 */ -#define IOCMDQ_ACTION 6 /* 0x050d0018 */ +#define IOCMDQ_CLIENT 1 /* 0x05040004 */ +#define IOCMDQ_LAT 2 /* 0x05040008 */ +#define IOCMDQ_SEMA 3 /* 0x0504000c */ +#define IOCMDQ_PSEMA 4 /* 0x05040010 */ +#define IOCMDQ_PLOCK 5 /* 0x05040014 */ +#define IOCMDQ_ACTION 6 /* 0x05040018 */ /* DBG_IOKIT/DBG_IOMCURS codes */ diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index 417fe2020..4564ad3eb 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -56,20 +56,9 @@ extern "C" { #ifndef __MACTYPES__ /* CF MacTypes.h */ #ifndef __TYPES__ /* guess... Mac Types.h */ +#include #include -#ifndef __cplusplus -#if !TYPE_BOOL -#ifdef KERNEL -typedef int bool; -enum { - false = 0, - true = 1 -}; -#endif -#endif -#endif - #endif /* __TYPES__ */ #endif /* __MACTYPES__ */ @@ -107,12 +96,23 @@ struct IOVirtualRange IOVirtualAddress address; IOByteCount length; }; +struct IOAddressRange +{ + mach_vm_address_t address; + mach_vm_size_t length; +}; #else typedef struct { IOVirtualAddress address; IOByteCount length; } IOVirtualRange; + +struct IOAddressRange +{ + mach_vm_address_t address; + mach_vm_size_t length; +}; #endif /* diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index d5c1d23d2..cbf9d4969 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -102,7 +102,10 @@ class IOUserClient : public IOService public: OSSet * mappings; - void * __reserved[8]; + UInt8 sharedInstance; + + UInt8 __reservedA[3]; + void * __reserved[7]; private: OSMetaClassDeclareReservedUnused(IOUserClient, 0); @@ -137,6 +140,10 @@ class IOUserClient : public IOService static IOReturn clientHasPrivilege( void * securityToken, const char * privilegeName ); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) + virtual bool init(); + virtual bool init( OSDictionary * dictionary ); +#endif // Currently ignores the all args, just passes up to IOService::init() virtual bool initWithTask( task_t owningTask, void * securityToken, UInt32 type, diff --git a/iokit/IOKit/IOWorkLoop.h b/iokit/IOKit/IOWorkLoop.h index f71331864..0fa9251aa 100644 --- a/iokit/IOKit/IOWorkLoop.h +++ b/iokit/IOKit/IOWorkLoop.h @@ -148,7 +148,13 @@ member function's parameter list. virtual void free(); /*! @function threadMain - @discussion Work loop threads main function. This function consists of 3 loops: the outermost loop is the semaphore clear and wait loop, the middle loop terminates when there is no more work, and the inside loop walks the event list calling the checkForWork method in each event source. If an event source has more work to do, it can set the more flag and the middle loop will repeat. When no more work is outstanding the outermost will sleep until an event is signalled or the least wakeupTime, whichever occurs first. If the event source does not require the semaphore wait to time out, it must set the provided wakeupTime parameter to zero. + @discussion Work loop threads main function. This function consists of 3 + loops: the outermost loop is the semaphore clear and wait loop, the middle + loop terminates when there is no more work, and the inside loop walks the + event list calling the checkForWork method in each event source. If an + event source has more work to do, it can set the more flag and the middle + loop will repeat. When no more work is outstanding the outermost will + sleep until an event is signalled. */ virtual void threadMain(); @@ -231,7 +237,7 @@ member function's parameter list. virtual void openGate(); virtual void closeGate(); virtual bool tryCloseGate(); - virtual int sleepGate(void *event, UInt32 interuptibleType); + virtual int sleepGate(void *event, UInt32 interuptibleType); virtual void wakeupGate(void *event, bool oneThread); public: @@ -239,9 +245,7 @@ member function's parameter list. /*! @function runAction @abstract Single thread a call to an action with the work-loop. - @discussion Client function that causes the given action to be called in -a single threaded manner. Beware: the work-loop's gate is recursive and runAction can cause direct or indirect re-entrancy. When executing on a client's thread, runAction will sleep until the work-loop's gate opens for -execution of client actions, the action is single threaded against all other work-loop event sources. + @discussion Client function that causes the given action to be called in a single threaded manner. Beware: the work-loop's gate is recursive and runAction can cause direct or indirect re-entrancy. When executing on a client's thread, runAction will sleep until the work-loop's gate opens for execution of client actions, the action is single threaded against all other work-loop event sources. @param action Pointer to function to be executed in work-loop context. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @@ -249,14 +253,40 @@ execution of client actions, the action is single threaded against all other wor @param arg3 Parameter for action parameter, defaults to 0. @result Returns the value of the Action callout. */ + OSMetaClassDeclareReservedUsed(IOWorkLoop, 0); virtual IOReturn runAction(Action action, OSObject *target, void *arg0 = 0, void *arg1 = 0, void *arg2 = 0, void *arg3 = 0); +#if !(defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) +/*! @function runEventSources + @discussion Consists of the inner 2 loops of the threadMain function(qv). + The outer loop terminates when there is no more work, and the inside loop + walks the event list calling the checkForWork method in each event source. + If an event source has more work to do, it can set the more flag and the + outer loop will repeat. +

+ This function can be used to clear a priority inversion between the normal + workloop thread and multimedia's real time threads. The problem is that + the interrupt action routine is often held off by high priority threads. + So if they want to get their data now they will have to call us and ask if + any data is available. The multi-media user client will arrange for this + function to be called, which causes any pending interrupts to be processed + and the completion routines called. By the time the function returns all + outstanding work will have been completed at the real time threads + priority. + + @result Return false if the work loop is shutting down, true otherwise. +*/ + OSMetaClassDeclareReservedUsed(IOWorkLoop, 1); + virtual bool runEventSources(); +#endif + protected: - OSMetaClassDeclareReservedUsed(IOWorkLoop, 0); +#if (defined(__ppc__) && defined(KPI_10_4_0_PPC_COMPAT)) OSMetaClassDeclareReservedUnused(IOWorkLoop, 1); +#endif OSMetaClassDeclareReservedUnused(IOWorkLoop, 2); OSMetaClassDeclareReservedUnused(IOWorkLoop, 3); OSMetaClassDeclareReservedUnused(IOWorkLoop, 4); diff --git a/iokit/IOKit/assert.h b/iokit/IOKit/assert.h index 575395cc1..70c7361d3 100644 --- a/iokit/IOKit/assert.h +++ b/iokit/IOKit/assert.h @@ -22,6 +22,7 @@ #ifndef _IO_ASSERT_H_ #define _IO_ASSERT_H_ +#include #ifdef __cplusplus extern "C" { @@ -39,5 +40,11 @@ extern "C" { } #endif + +#if( !defined( OSCompileAssert ) ) +# define OSCompileAssert( TEST ) \ + extern int OSCompileAssertFailed[ ( TEST ) ? 1 : -1 ] __unused; +#endif + #endif /* _IO_ASSERT_H_ */ diff --git a/iokit/IOKit/nvram/IONVRAMController.h b/iokit/IOKit/nvram/IONVRAMController.h index eccee0bc4..691d4a1d3 100644 --- a/iokit/IOKit/nvram/IONVRAMController.h +++ b/iokit/IOKit/nvram/IONVRAMController.h @@ -20,6 +20,9 @@ * @APPLE_LICENSE_HEADER_END@ */ +#ifndef _IOKIT_IONVRAMCONTROLLER_H +#define _IOKIT_IONVRAMCONTROLLER_H + #include class IONVRAMController: public IOService @@ -36,3 +39,5 @@ class IONVRAMController: public IOService virtual IOReturn write(IOByteCount offset, UInt8 *buffer, IOByteCount length) = 0; }; + +#endif /* !_IOKIT_IONVRAMCONTROLLER_H */ diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index 5f8dd4760..4d355012d 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,246 +22,343 @@ #ifndef _IOKIT_IOPM_H #define _IOKIT_IOPM_H -#define IOPMMaxPowerStates 10 +#include +#include +#include + +#ifdef __ppc__ +#include +#endif + + + +enum { + kIOPMMaxPowerStates = 10, + IOPMMaxPowerStates = kIOPMMaxPowerStates +}; typedef unsigned long IOPMPowerFlags; enum { - // following bits are used in the input and output power fields - kIOPMClockNormal = 0x0004, - kIOPMClockRunning = 0x0008, - kIOPMAuxPowerOn = 0x0020, // used only between root and root parent - kIOPMPagingAvailable = 0x0020, // used only between paging plexus and its children - kIOPMPassThrough = 0x0100, - kIOPMDoze = 0x0400, - kIOPMSoftSleep = 0x0400, // old usage, replaced by kIOPMDoze - kIOPMSleep = 0x0001, - kIOPMRestart = 0x0080, - // following bits are used in the capabilites field and the power fields - kIOPMPowerOn = 0x0002, - kIOPMPreventSystemSleep = 0x0010, - kIOPMPreventIdleSleep = 0x0040, - // following bits are used in the capabilites field - kIOPMNotAttainable = 0x0001, // used between a driver and its policy-maker - kIOPMChildClamp = 0x0080, // used internally in a power domain parent - kIOPMChildClamp2 = 0x0200, // used internally in a power domain parent - kIOPMDeviceUsable = 0x8000, - kIOPMMaxPerformance = 0x4000, - kIOPMContextRetained = 0x2000, - kIOPMConfigRetained = 0x1000, - kIOPMSleepCapability = 0x0004, - kIOPMRestartCapability = 0x0080, - kIOPMNotPowerManaged = 0x0800, // this is an error return rather than a bit - kIOPMStaticPowerValid = 0x0800, // therefore this bit safely overloads it + // The following bits are used in the input and output power fields. + kIOPMClockNormal = 0x0004, + kIOPMClockRunning = 0x0008, + // Reserved - Used only between root and root parent. + kIOPMAuxPowerOn = 0x0020, + // Reserved - kIOPMPagingAvailable used only by now-defunct paging plexus + kIOPMPagingAvailable = 0x0020, + kIOPMPassThrough = 0x0100, + kIOPMDoze = 0x0400, + // Obsolete - use kIOPMDoze instead of kIOPMSoftSleep + kIOPMSoftSleep = 0x0400, + kIOPMSleep = 0x0001, + kIOPMRestart = 0x0080, + + // The following bits are used in the capabilites field and the power fields + kIOPMPowerOn = 0x0002, + kIOPMPreventSystemSleep = 0x0010, + kIOPMPreventIdleSleep = 0x0040, + + // The following bits are used in the capabilites field only. + // Used between a driver and its policy-maker + kIOPMNotAttainable = 0x0001, + // Used internally in a power domain parent + kIOPMChildClamp = 0x0080, + // Used internally in a power domain parent + kIOPMChildClamp2 = 0x0200, + // Marks device as usable in this state + kIOPMDeviceUsable = 0x8000, + // Device runs at max performance in this state + kIOPMMaxPerformance = 0x4000, + kIOPMContextRetained = 0x2000, + kIOPMConfigRetained = 0x1000, + // Device is capable of system sleep in this state + kIOPMSleepCapability = 0x0004, + kIOPMRestartCapability = 0x0080, + + // Reserved - Error code. (this is an error return rather than a bit) + kIOPMNotPowerManaged = 0x0800, + // Therefore this bit safely overloads it + kIOPMStaticPowerValid = 0x0800, - kIOPMCapabilitiesMask = kIOPMPowerOn | kIOPMDeviceUsable | kIOPMMaxPerformance | - kIOPMContextRetained | kIOPMConfigRetained | kIOPMSleepCapability | + kIOPMCapabilitiesMask = kIOPMPowerOn | kIOPMDeviceUsable | + kIOPMMaxPerformance | kIOPMContextRetained | + kIOPMConfigRetained | kIOPMSleepCapability | kIOPMRestartCapability }; enum { - IOPMNotAttainable = kIOPMNotAttainable, - IOPMPowerOn = kIOPMPowerOn, - IOPMClockNormal = kIOPMClockNormal, - IOPMClockRunning = kIOPMClockRunning, - IOPMAuxPowerOn = kIOPMAuxPowerOn, - IOPMDeviceUsable = kIOPMDeviceUsable, - IOPMMaxPerformance = kIOPMMaxPerformance, - IOPMContextRetained = kIOPMContextRetained, - IOPMConfigRetained = kIOPMConfigRetained, - IOPMNotPowerManaged = kIOPMNotPowerManaged, - IOPMPagingAvailable = kIOPMPagingAvailable, - IOPMSoftSleep = kIOPMSoftSleep + IOPMNotAttainable = kIOPMNotAttainable, + IOPMPowerOn = kIOPMPowerOn, + IOPMClockNormal = kIOPMClockNormal, + IOPMClockRunning = kIOPMClockRunning, + IOPMAuxPowerOn = kIOPMAuxPowerOn, + IOPMDeviceUsable = kIOPMDeviceUsable, + IOPMMaxPerformance = kIOPMMaxPerformance, + IOPMContextRetained = kIOPMContextRetained, + IOPMConfigRetained = kIOPMConfigRetained, + IOPMNotPowerManaged = kIOPMNotPowerManaged, + IOPMPagingAvailable = kIOPMPagingAvailable, + IOPMSoftSleep = kIOPMSoftSleep }; enum { - kIOPMNextHigherState = 1, - kIOPMHighestState = 2, - kIOPMNextLowerState = 3, - kIOPMLowestState = 4 + kIOPMNextHigherState = 1, + kIOPMHighestState = 2, + kIOPMNextLowerState = 3, + kIOPMLowestState = 4 }; enum { - IOPMNextHigherState = kIOPMNextHigherState, - IOPMHighestState = kIOPMHighestState, - IOPMNextLowerState = kIOPMNextLowerState, - IOPMLowestState = kIOPMLowestState + IOPMNextHigherState = kIOPMNextHigherState, + IOPMHighestState = kIOPMHighestState, + IOPMNextLowerState = kIOPMNextLowerState, + IOPMLowestState = kIOPMLowestState }; - - -enum { // commands on power managment command queue +// Internal commands used by power managment command queue +enum { kIOPMBroadcastAggressiveness = 1, kIOPMUnidleDevice }; -enum { // special value means "power consumption unknown" +// Power consumption unknown value +enum { kIOPMUnknown = 0xFFFF }; -// Power events +/******************************************************************************* + * + * Root Domain property keys of interest + * + ******************************************************************************/ + +/* AppleClamshellState + * reflects the state of the clamshell (lid) on a portable. + * It has a boolean value. + * true == clamshell is closed + * false == clamshell is open + * not present == no clamshell on this hardware + */ +#define kAppleClamshellStateKey "AppleClamshellState" + +/* AppleClamshellCausesSleep + * reflects the clamshell close behavior on a portable. + * It has a boolean value. + * true == system will sleep when clamshell is closed + * false == system will not sleep on clamshell close + * (typically external display mode) + * not present == no clamshell on this hardware + */ +#define kAppleClamshellCausesSleepKey "AppleClamshellCausesSleep" + +/******************************************************************************* + * + * Root Domain general interest messages + * + ******************************************************************************/ + +/* kIOPMMessageClamshellStateChange + * Delivered as a general interest notification on the IOPMrootDomain + * IOPMrootDomain sends this message when state of either AppleClamshellState + * or AppleClamshellCausesSleep changes. If this clamshell change results in + * a sleep, the sleep will initiate soon AFTER delivery of this message. + * The state of both variables is encoded in a bitfield argument sent with + * the message. Check bits 0 and 1 using kClamshellStateBit & kClamshellSleepBit + */ +enum { + kClamshellStateBit = (1 << 0), + kClamshellSleepBit = (1 << 1) +}; + +#define kIOPMMessageClamshellStateChange \ + iokit_family_msg(sub_iokit_powermanagement, 0x100) + +/* kIOPMMessageFeatureChange + * Delivered when the set of supported features ("Supported Features" dictionary + * under IOPMrootDomain registry) changes in some way. Typically addition or + * removal of a supported feature. + * RootDomain passes no argument with this message. + */ +#define kIOPMMessageFeatureChange \ + iokit_family_msg(sub_iokit_powermanagement, 0x110) + +/* kIOPMMessageInflowDisableCancelled + * The battery has drained completely to its "Fully Discharged" state. + * If a user process has disabled battery inflow for battery + * calibration, we forcibly re-enable Inflow at this point. + * If inflow HAS been forcibly re-enabled, bit 0 + * (kInflowForciblyEnabledBit) will be set. + */ enum { - kClamshellClosedEventMask = (1<<0), // User closed lid - kDockingBarEventMask = (1<<1), // OBSOLETE - kACPlugEventMask = (1<<2), // User plugged or unplugged adapter - kFrontPanelButtonEventMask = (1<<3), // User hit the front panel button - kBatteryStatusEventMask = (1<<4) // Battery status has changed + kInflowForciblyEnabledBit = (1 << 0) }; -// Power commands issued to root domain +#define kIOPMMessageInternalBatteryFullyDischarged \ + iokit_family_msg(sub_iokit_powermanagement, 0x120) + + +/******************************************************************************* + * + * Power commands issued to root domain + * + * These commands are issued from system drivers only: + * ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily + * + ******************************************************************************/ enum { - kIOPMSleepNow = (1<<0), // put machine to sleep now - kIOPMAllowSleep = (1<<1), // allow idle sleep - kIOPMPreventSleep = (1<<2), // do not allow idle sleep - kIOPMPowerButton = (1<<3), // power button was pressed - kIOPMClamshellClosed = (1<<4), // clamshell was closed - kIOPMPowerEmergency = (1<<5), // battery dangerously low - kIOPMDisableClamshell = (1<<6), // do not sleep on clamshell closure - kIOPMEnableClamshell = (1<<7), // sleep on clamshell closure - kIOPMProcessorSpeedChange = (1<<8), // change the processor speed - kIOPMOverTemp = (1<<9) // system dangerously hot + kIOPMSleepNow = (1<<0), // put machine to sleep now + kIOPMAllowSleep = (1<<1), // allow idle sleep + kIOPMPreventSleep = (1<<2), // do not allow idle sleep + kIOPMPowerButton = (1<<3), // power button was pressed + kIOPMClamshellClosed = (1<<4), // clamshell was closed + kIOPMPowerEmergency = (1<<5), // battery dangerously low + kIOPMDisableClamshell = (1<<6), // do not sleep on clamshell closure + kIOPMEnableClamshell = (1<<7), // sleep on clamshell closure + kIOPMProcessorSpeedChange = (1<<8), // change the processor speed + kIOPMOverTemp = (1<<9), // system dangerously hot + kIOPMClamshellOpened = (1<<10) // clamshell was opened }; - // Return codes -// PUBLIC power management features -// NOTE: this is a direct port from classic, some of these bits -// are obsolete but are included for completeness +/******************************************************************************* + * + * Power Management Return Codes + * + ******************************************************************************/ enum { - kPMHasWakeupTimerMask = (1<<0), // 1=wake timer is supported - kPMHasSharedModemPortMask = (1<<1), // Not used - kPMHasProcessorCyclingMask = (1<<2), // 1=processor cycling supported - kPMMustProcessorCycleMask = (1<<3), // Not used - kPMHasReducedSpeedMask = (1<<4), // 1=supports reduced processor speed - kPMDynamicSpeedChangeMask = (1<<5), // 1=supports changing processor speed on the fly - kPMHasSCSIDiskModeMask = (1<<6), // 1=supports using machine as SCSI drive - kPMCanGetBatteryTimeMask = (1<<7), // 1=battery time can be calculated - kPMCanWakeupOnRingMask = (1<<8), // 1=machine can wake on modem ring - kPMHasDimmingSupportMask = (1<<9), // 1=has monitor dimming support - kPMHasStartupTimerMask = (1<<10), // 1=can program startup timer - kPMHasChargeNotificationMask = (1<<11), // 1=client can determine charger status/get notifications - kPMHasDimSuspendSupportMask = (1<<12), // 1=can dim diplay to DPMS ('off') state - kPMHasWakeOnNetActivityMask = (1<<13), // 1=supports waking upon receipt of net packet - kPMHasWakeOnLidMask = (1<<14), // 1=can wake upon lid/case opening - kPMCanPowerOffPCIBusMask = (1<<15), // 1=can remove power from PCI bus on sleep - kPMHasDeepSleepMask = (1<<16), // 1=supports deep (hibernation) sleep - kPMHasSleepMask = (1<<17), // 1=machine support low power sleep (ala powerbooks) - kPMSupportsServerModeAPIMask = (1<<18), // 1=supports reboot on AC resume for unexpected power loss - kPMHasUPSIntegrationMask = (1<<19) // 1=supports incorporating UPS devices into power source calcs + kIOPMNoErr = 0, + // Returned by powerStateWillChange and powerStateDidChange: + // Immediate acknowledgement of power state change + kIOPMAckImplied = 0, + // Acknowledgement of power state change will come later + kIOPMWillAckLater = 1, + + // Returned by requestDomainState: + // Unrecognized specification parameter + kIOPMBadSpecification = 4, + // No power state matches search specification + kIOPMNoSuchState = 5, + + // Device cannot change its power for some reason + kIOPMCannotRaisePower = 6, + + // Returned by changeStateTo: + // Requested state doesn't exist + kIOPMParameterError = 7, + // Device not yet fully hooked into power management + kIOPMNotYetInitialized = 8, + + // And the old constants; deprecated + IOPMNoErr = kIOPMNoErr, + IOPMAckImplied = kIOPMAckImplied, + IOPMWillAckLater = kIOPMWillAckLater, + IOPMBadSpecification = kIOPMBadSpecification, + IOPMNoSuchState = kIOPMNoSuchState, + IOPMCannotRaisePower = kIOPMCannotRaisePower, + IOPMParameterError = kIOPMParameterError, + IOPMNotYetInitialized = kIOPMNotYetInitialized +}; + + +// IOPMPowerSource class descriptive strings +// Power Source state is published as properties to the IORegistry under these +// keys. +#define kIOPMPSExternalConnectedKey "ExternalConnected" +#define kIOPMPSExternalChargeCapableKey "ExternalChargeCapable" +#define kIOPMPSBatteryInstalledKey "BatteryInstalled" +#define kIOPMPSIsChargingKey "IsCharging" +#define kIOPMFullyChargedKey "FullyCharged" +#define kIOPMPSAtWarnLevelKey "AtWarnLevel" +#define kIOPMPSAtCriticalLevelKey "AtCriticalLevel" +#define kIOPMPSCurrentCapacityKey "CurrentCapacity" +#define kIOPMPSMaxCapacityKey "MaxCapacity" +#define kIOPMPSDesignCapacityKey "DesignCapacity" +#define kIOPMPSTimeRemainingKey "TimeRemaining" +#define kIOPMPSAmperageKey "Amperage" +#define kIOPMPSVoltageKey "Voltage" +#define kIOPMPSCycleCountKey "CycleCount" +#define kIOPMPSMaxErrKey "MaxErr" +#define kIOPMPSAdapterInfoKey "AdapterInfo" +#define kIOPMPSLocationKey "Location" +#define kIOPMPSErrorConditionKey "ErrorCondition" +#define kIOPMPSManufacturerKey "Manufacturer" +#define kIOPMPSManufactureDateKey "ManufactureDate" +#define kIOPMPSModelKey "Model" +#define kIOPMPSSerialKey "Serial" +#define kIOPMDeviceNameKey "DeviceName" +#define kIOPMPSLegacyBatteryInfoKey "LegacyBatteryInfo" +#define kIOPMPSBatteryHealthKey "BatteryHealth" +#define kIOPMPSHealthConfidenceKey "HealthConfidence" + +// Definitions for battery location, in case of multiple batteries. +// A location of 0 is unspecified +// Location is undefined for single battery systems +enum { + kIOPMPSLocationLeft = 1001, + kIOPMPSLocationRight = 1002 }; -// PRIVATE power management features -// NOTE: this is a direct port from classic, some of these bits -// are obsolete but are included for completeness. +// Battery quality health types, specified by BatteryHealth and HealthConfidence +// properties in an IOPMPowerSource battery kext. enum { - kPMHasExtdBattInfoMask = (1<<0), // Not used - kPMHasBatteryIDMask = (1<<1), // Not used - kPMCanSwitchPowerMask = (1<<2), // Not used - kPMHasCelsiusCyclingMask = (1<<3), // Not used - kPMHasBatteryPredictionMask = (1<<4), // Not used - kPMHasPowerLevelsMask = (1<<5), // Not used - kPMHasSleepCPUSpeedMask = (1<<6), // Not used - kPMHasBtnIntHandlersMask = (1<<7), // 1=supports individual button interrupt handlers - kPMHasSCSITermPowerMask = (1<<8), // 1=supports SCSI termination power switch - kPMHasADBButtonHandlersMask = (1<<9), // 1=supports button handlers via ADB - kPMHasICTControlMask = (1<<10), // 1=supports ICT control - kPMHasLegacyDesktopSleepMask = (1<<11), // 1=supports 'doze' style sleep - kPMHasDeepIdleMask = (1<<12), // 1=supports Idle2 in hardware - kPMOpenLidPreventsSleepMask = (1<<13), // 1=open case prevent machine from sleeping - kPMClosedLidCausesSleepMask = (1<<14), // 1=case closed (clamshell closed) causes sleep - kPMHasFanControlMask = (1<<15), // 1=machine has software-programmable fan/thermostat controls - kPMHasThermalControlMask = (1<<16), // 1=machine supports thermal monitoring - kPMHasVStepSpeedChangeMask = (1<<17), // 1=machine supports processor voltage/clock change - kPMEnvironEventsPolledMask = (1<<18) // 1=machine doesn't generate pmu env ints, we must poll instead + kIOPMUndefinedValue = 0, + kIOPMPoorValue = 1, + kIOPMFairValue = 2, + kIOPMGoodValue = 3 }; -// DEFAULT public and private features for machines whose device tree -// does NOT contain this information (pre-Core99). - -// For Cuda-based Desktops - -#define kStdDesktopPMFeatures kPMHasWakeupTimerMask |\ - kPMHasProcessorCyclingMask |\ - kPMHasDimmingSupportMask |\ - kPMHasStartupTimerMask |\ - kPMSupportsServerModeAPIMask |\ - kPMHasUPSIntegrationMask - -#define kStdDesktopPrivPMFeatures kPMHasExtdBattInfoMask |\ - kPMHasICTControlMask |\ - kPMHasLegacyDesktopSleepMask - -#define kStdDesktopNumBatteries 0 - -// For Wallstreet (PowerBook G3 Series 1998) - -#define kWallstreetPMFeatures kPMHasWakeupTimerMask |\ - kPMHasProcessorCyclingMask |\ - kPMHasReducedSpeedMask |\ - kPMDynamicSpeedChangeMask |\ - kPMHasSCSIDiskModeMask |\ - kPMCanGetBatteryTimeMask |\ - kPMHasDimmingSupportMask |\ - kPMHasChargeNotificationMask |\ - kPMHasDimSuspendSupportMask |\ - kPMHasSleepMask - -#define kWallstreetPrivPMFeatures kPMHasExtdBattInfoMask |\ - kPMHasBatteryIDMask |\ - kPMCanSwitchPowerMask |\ - kPMHasADBButtonHandlersMask |\ - kPMHasSCSITermPowerMask |\ - kPMHasICTControlMask |\ - kPMClosedLidCausesSleepMask |\ - kPMEnvironEventsPolledMask - -#define kStdPowerBookPMFeatures kWallstreetPMFeatures -#define kStdPowerBookPrivPMFeatures kWallstreetPrivPMFeatures - -#define kStdPowerBookNumBatteries 2 - -// For 101 (PowerBook G3 Series 1999) - -#define k101PMFeatures kPMHasWakeupTimerMask |\ - kPMHasProcessorCyclingMask |\ - kPMHasReducedSpeedMask |\ - kPMDynamicSpeedChangeMask |\ - kPMHasSCSIDiskModeMask |\ - kPMCanGetBatteryTimeMask |\ - kPMHasDimmingSupportMask |\ - kPMHasChargeNotificationMask |\ - kPMHasDimSuspendSupportMask |\ - kPMHasSleepMask |\ - kPMHasUPSIntegrationMask - -#define k101PrivPMFeatures kPMHasExtdBattInfoMask |\ - kPMHasBatteryIDMask |\ - kPMCanSwitchPowerMask |\ - kPMHasADBButtonHandlersMask |\ - kPMHasSCSITermPowerMask |\ - kPMHasICTControlMask |\ - kPMClosedLidCausesSleepMask |\ - kPMEnvironEventsPolledMask - -#define IOPMNoErr 0 // normal return - - // returned by powerStateWillChange and powerStateDidChange: -#define IOPMAckImplied 0 // acknowledgement of power state change is implied -#define IOPMWillAckLater 1 // acknowledgement of power state change will come later - - // returned by requestDomainState -#define IOPMBadSpecification 4 // unrecognized specification parameter -#define IOPMNoSuchState 5 // no power state matches search specification - -#define IOPMCannotRaisePower 6 // a device cannot change its power for some reason - - // returned by changeStateTo -#define IOPMParameterError 7 // requested state doesn't exist -#define IOPMNotYetInitialized 8 // device not yet fully hooked into power management "graph" - - - // used by Root Domain UserClient +// Battery's time remaining estimate is invalid this long (seconds) after a wake +#define kIOPMPSInvalidWakeSecondsKey "BatteryInvalidWakeSeconds" + +// Battery must wait this long (seconds) after being completely charged before +// the battery is settled. +#define kIOPMPSPostChargeWaitSecondsKey "PostChargeWaitSeconds" + +// Battery must wait this long (seconds) after being completely discharged +// before the battery is settled. +#define kIOPMPSPostDishargeWaitSecondsKey "PostDischargeWaitSeconds" + + + +// PM Settings Controller setting types +// Settings types used primarily with: +// IOPMrootDomain::registerPMSettingController +// The values are identical to the similarly named keys for use in user space +// PM settings work. Those keys are defined in IOPMLibPrivate.h. +#define kIOPMSettingWakeOnRingKey "Wake On Modem Ring" +#define kIOPMSettingRestartOnPowerLossKey "Automatic Restart On Power Loss" +#define kIOPMSettingWakeOnACChangeKey "Wake On AC Change" +#define kIOPMSettingSleepOnPowerButtonKey "Sleep On Power Button" +#define kIOPMSettingWakeOnClamshellKey "Wake On Clamshell Open" +#define kIOPMSettingReduceBrightnessKey "ReduceBrightness" +#define kIOPMSettingDisplaySleepUsesDimKey "Display Sleep Uses Dim" +#define kIOPMSettingTimeZoneOffsetKey "TimeZoneOffsetSeconds" + +// Setting controlling drivers can register to receive scheduled wake data +// Either in "CF seconds" type, or structured calendar data in a formatted +// IOPMCalendarStruct defined below. +#define kIOPMSettingAutoWakeSecondsKey "wake" +#define kIOPMSettingAutoWakeCalendarKey "WakeByCalendarDate" +#define kIOPMSettingAutoPowerSecondsKey "poweron" +#define kIOPMSettingAutoPowerCalendarKey "PowerByCalendarDate" + +// Debug seconds auto wake +// Used by sleep cycling debug tools +#define kIOPMSettingDebugWakeRelativeKey "WakeRelativeToSleep" +#define kIOPMSettingDebugPowerRelativeKey "PowerRelativeToShutdown" + +struct IOPMCalendarStruct { + UInt32 year; + UInt8 month; + UInt8 day; + UInt8 hour; + UInt8 minute; + UInt8 second; +}; +typedef struct IOPMCalendarStruct IOPMCalendarStruct; +// SetAggressiveness types enum { kPMGeneralAggressiveness = 0, kPMMinutesToDim, @@ -281,21 +378,21 @@ enum { kIOPMExternalPower }; -#define kAppleClamshellStateKey "AppleClamshellState" -#define kIOREMSleepEnabledKey "REMSleepEnabled" +#define kIOREMSleepEnabledKey "REMSleepEnabled" // Strings for deciphering the dictionary returned from IOPMCopyBatteryInfo -#define kIOBatteryInfoKey "IOBatteryInfo" -#define kIOBatteryCurrentChargeKey "Current" -#define kIOBatteryCapacityKey "Capacity" -#define kIOBatteryFlagsKey "Flags" -#define kIOBatteryVoltageKey "Voltage" -#define kIOBatteryAmperageKey "Amperage" +#define kIOBatteryInfoKey "IOBatteryInfo" +#define kIOBatteryCurrentChargeKey "Current" +#define kIOBatteryCapacityKey "Capacity" +#define kIOBatteryFlagsKey "Flags" +#define kIOBatteryVoltageKey "Voltage" +#define kIOBatteryAmperageKey "Amperage" +#define kIOBatteryCycleCountKey "Cycle Count" enum { - kIOBatteryInstalled = (1 << 2), - kIOBatteryCharge = (1 << 1), - kIOBatteryChargerConnect = (1 << 0) + kIOBatteryInstalled = (1 << 2), + kIOBatteryCharge = (1 << 1), + kIOBatteryChargerConnect = (1 << 0) }; @@ -306,20 +403,7 @@ enum { // Apple private Legacy messages for re-routing AutoWake and AutoPower messages to the PMU // through newer user space IOPMSchedulePowerEvent API #define kIOPMUMessageLegacyAutoWake iokit_family_msg(sub_iokit_pmu, 0x200) -#define kIOPMUMessageLegacyAutoPower iokit_family_msg(sub_iokit_pmu, 0x210) - -// These flags are deprecated. Use the version with the kIOPM prefix below. -enum { - kACInstalled = kIOBatteryChargerConnect, - kBatteryCharging = kIOBatteryCharge, - kBatteryInstalled = kIOBatteryInstalled, - kUPSInstalled = (1<<3), - kBatteryAtWarn = (1<<4), - kBatteryDepleted = (1<<5), - kACnoChargeCapability = (1<<6), // AC adapter cannot charge battery - kRawLowBattery = (1<<7), // used only by Platform Expert - kForceLowSpeed = (1<<8) // set by Platfm Expert, chk'd by Pwr Plugin}; -}; +#define kIOPMUMessageLegacyAutoPower iokit_family_msg(sub_iokit_pmu, 0x210) // For use with IOPMPowerSource bFlags #define IOPM_POWER_SOURCE_REV 2 @@ -355,17 +439,17 @@ enum { }; struct stateChangeNote{ - IOPMPowerFlags stateFlags; - unsigned long stateNum; - void * powerRef; + IOPMPowerFlags stateFlags; + unsigned long stateNum; + void * powerRef; }; typedef struct stateChangeNote stateChangeNote; struct IOPowerStateChangeNotification { - void * powerRef; - unsigned long returnValue; - unsigned long stateNumber; - IOPMPowerFlags stateFlags; + void * powerRef; + unsigned long returnValue; + unsigned long stateNumber; + IOPMPowerFlags stateFlags; }; typedef struct IOPowerStateChangeNotification IOPowerStateChangeNotification; typedef IOPowerStateChangeNotification sleepWakeNote; diff --git a/iokit/IOKit/pwr_mgt/IOPMDeprecated.h b/iokit/IOKit/pwr_mgt/IOPMDeprecated.h new file mode 100644 index 000000000..67932d8a9 --- /dev/null +++ b/iokit/IOKit/pwr_mgt/IOPMDeprecated.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _IOPMDeprecated_h_ +#define _IOPMDeprecated_h_ + +#ifdef __ppc__ + +// Power events +enum { + kClamshellClosedEventMask = (1<<0), // User closed lid + kDockingBarEventMask = (1<<1), // OBSOLETE + kACPlugEventMask = (1<<2), // User plugged or unplugged adapter + kFrontPanelButtonEventMask = (1<<3), // User hit the front panel button + kBatteryStatusEventMask = (1<<4) // Battery status has changed +}; + +// PUBLIC power management features +// NOTE: this is a direct port from classic, some of these bits +// are obsolete but are included for completeness +enum { + kPMHasWakeupTimerMask = (1<<0), // 1=wake timer is supported + kPMHasSharedModemPortMask = (1<<1), // Not used + kPMHasProcessorCyclingMask = (1<<2), // 1=processor cycling supported + kPMMustProcessorCycleMask = (1<<3), // Not used + kPMHasReducedSpeedMask = (1<<4), // 1=supports reduced processor speed + kPMDynamicSpeedChangeMask = (1<<5), // 1=supports changing processor speed on the fly + kPMHasSCSIDiskModeMask = (1<<6), // 1=supports using machine as SCSI drive + kPMCanGetBatteryTimeMask = (1<<7), // 1=battery time can be calculated + kPMCanWakeupOnRingMask = (1<<8), // 1=machine can wake on modem ring + kPMHasDimmingSupportMask = (1<<9), // 1=has monitor dimming support + kPMHasStartupTimerMask = (1<<10), // 1=can program startup timer + kPMHasChargeNotificationMask = (1<<11), // 1=client can determine charger status/get notifications + kPMHasDimSuspendSupportMask = (1<<12), // 1=can dim diplay to DPMS ('off') state + kPMHasWakeOnNetActivityMask = (1<<13), // 1=supports waking upon receipt of net packet + kPMHasWakeOnLidMask = (1<<14), // 1=can wake upon lid/case opening + kPMCanPowerOffPCIBusMask = (1<<15), // 1=can remove power from PCI bus on sleep + kPMHasDeepSleepMask = (1<<16), // 1=supports deep (hibernation) sleep + kPMHasSleepMask = (1<<17), // 1=machine support low power sleep (ala powerbooks) + kPMSupportsServerModeAPIMask = (1<<18), // 1=supports reboot on AC resume for unexpected power loss + kPMHasUPSIntegrationMask = (1<<19) // 1=supports incorporating UPS devices into power source calcs +}; + +// PRIVATE power management features +// NOTE: this is a direct port from classic, some of these bits +// are obsolete but are included for completeness. +enum { + kPMHasExtdBattInfoMask = (1<<0), // Not used + kPMHasBatteryIDMask = (1<<1), // Not used + kPMCanSwitchPowerMask = (1<<2), // Not used + kPMHasCelsiusCyclingMask = (1<<3), // Not used + kPMHasBatteryPredictionMask = (1<<4), // Not used + kPMHasPowerLevelsMask = (1<<5), // Not used + kPMHasSleepCPUSpeedMask = (1<<6), // Not used + kPMHasBtnIntHandlersMask = (1<<7), // 1=supports individual button interrupt handlers + kPMHasSCSITermPowerMask = (1<<8), // 1=supports SCSI termination power switch + kPMHasADBButtonHandlersMask = (1<<9), // 1=supports button handlers via ADB + kPMHasICTControlMask = (1<<10), // 1=supports ICT control + kPMHasLegacyDesktopSleepMask = (1<<11), // 1=supports 'doze' style sleep + kPMHasDeepIdleMask = (1<<12), // 1=supports Idle2 in hardware + kPMOpenLidPreventsSleepMask = (1<<13), // 1=open case prevent machine from sleeping + kPMClosedLidCausesSleepMask = (1<<14), // 1=case closed (clamshell closed) causes sleep + kPMHasFanControlMask = (1<<15), // 1=machine has software-programmable fan/thermostat controls + kPMHasThermalControlMask = (1<<16), // 1=machine supports thermal monitoring + kPMHasVStepSpeedChangeMask = (1<<17), // 1=machine supports processor voltage/clock change + kPMEnvironEventsPolledMask = (1<<18) // 1=machine doesn't generate pmu env ints, we must poll instead +}; + +// DEFAULT public and private features for machines whose device tree +// does NOT contain this information (pre-Core99). + +// For Cuda-based Desktops + +#define kStdDesktopPMFeatures kPMHasWakeupTimerMask |\ + kPMHasProcessorCyclingMask |\ + kPMHasDimmingSupportMask |\ + kPMHasStartupTimerMask |\ + kPMSupportsServerModeAPIMask |\ + kPMHasUPSIntegrationMask + +#define kStdDesktopPrivPMFeatures kPMHasExtdBattInfoMask |\ + kPMHasICTControlMask |\ + kPMHasLegacyDesktopSleepMask + +#define kStdDesktopNumBatteries 0 + +// For Wallstreet (PowerBook G3 Series 1998) + +#define kWallstreetPMFeatures kPMHasWakeupTimerMask |\ + kPMHasProcessorCyclingMask |\ + kPMHasReducedSpeedMask |\ + kPMDynamicSpeedChangeMask |\ + kPMHasSCSIDiskModeMask |\ + kPMCanGetBatteryTimeMask |\ + kPMHasDimmingSupportMask |\ + kPMHasChargeNotificationMask |\ + kPMHasDimSuspendSupportMask |\ + kPMHasSleepMask + +#define kWallstreetPrivPMFeatures kPMHasExtdBattInfoMask |\ + kPMHasBatteryIDMask |\ + kPMCanSwitchPowerMask |\ + kPMHasADBButtonHandlersMask |\ + kPMHasSCSITermPowerMask |\ + kPMHasICTControlMask |\ + kPMClosedLidCausesSleepMask |\ + kPMEnvironEventsPolledMask + +#define kStdPowerBookPMFeatures kWallstreetPMFeatures +#define kStdPowerBookPrivPMFeatures kWallstreetPrivPMFeatures + +#define kStdPowerBookNumBatteries 2 + +// For 101 (PowerBook G3 Series 1999) + +#define k101PMFeatures kPMHasWakeupTimerMask |\ + kPMHasProcessorCyclingMask |\ + kPMHasReducedSpeedMask |\ + kPMDynamicSpeedChangeMask |\ + kPMHasSCSIDiskModeMask |\ + kPMCanGetBatteryTimeMask |\ + kPMHasDimmingSupportMask |\ + kPMHasChargeNotificationMask |\ + kPMHasDimSuspendSupportMask |\ + kPMHasSleepMask |\ + kPMHasUPSIntegrationMask + +#define k101PrivPMFeatures kPMHasExtdBattInfoMask |\ + kPMHasBatteryIDMask |\ + kPMCanSwitchPowerMask |\ + kPMHasADBButtonHandlersMask |\ + kPMHasSCSITermPowerMask |\ + kPMHasICTControlMask |\ + kPMClosedLidCausesSleepMask |\ + kPMEnvironEventsPolledMask + + +// These flags are deprecated. Use the version with the kIOPM prefix in IOPM.h +enum { + kACInstalled = (1<<0), + kBatteryCharging = (1<<1), + kBatteryInstalled = (1<<2), + kUPSInstalled = (1<<3), + kBatteryAtWarn = (1<<4), + kBatteryDepleted = (1<<5), + kACnoChargeCapability = (1<<6), // AC adapter cannot charge battery + kRawLowBattery = (1<<7), // used only by Platform Expert + kForceLowSpeed = (1<<8) // set by Platfm Expert, chk'd by Pwr Plugin}; +}; + +#endif /* __ppc32 */ +#endif /* _IOPMDeprecated_h_ */ diff --git a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h index db54beff3..b8e4889e6 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h +++ b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,59 +19,244 @@ * * @APPLE_LICENSE_HEADER_END@ */ + +#ifndef _IOPMPowerSource_h_ +#define _IOPMPowerSource_h_ + #include +#include #include #include -#include "IOPM.h" - -class ApplePMU; +#include -const unsigned long kSecondsPerHour = (60*60); -const unsigned long kTenMinutesInSeconds = (10 * 60); +enum { + kSecondsPerHour = 3600, + kTenMinutesInSeconds = 600 +}; -// our battery (power source) object +/* class IOPMPowerSource + * + * See IOKit/pwr_mgt/IOPM.h for power source keys relevant to this class. These + * report-type keys are required for calls to IOPMPowerSource::setReportables(), + * and they define the IORegistry interface through which data is passed back + * up to the rest of the system. + * + * A subclassing driver that doesn't want to do anything fancy should: + * 1. Subclass IOPMPowerSource + * 3. Install its own battery change notifications or polling routine that can + * converse with actual battery hardware. + * 4. When battery state changes, change the relevant member variables + * through setCurrentCapacity() style accessors. + * 5. Call updateStatus() on itself when all such settings have been updated. + * + * The subclass driver should also initially populate its settings and call + * updateStatus() on launch. + * + * + * Settings + * + * ExternalConnected + * Type: bool + * IORegistry Key: kIOPMPSExternalConnectedKey + * True if computer is drawing external power + * + * ExternalChargeCapable + * Type: bool + * IORegistry Key: kIOPMPSExternalChargeCapableKey + * True if external power is capable of charging internal battery + * + * BatteryInstalled + * Type: bool + * IORegistry Key: kIOPMPSBatteryInstalledKey + * True if a battery is present; false if removed + * + * IsCharging + * Type: bool + * IORegistry Key: kIOPMPSIsChargingKey + * True if battery is charging itself from external power + * + * AtWarnLevel + * Type: bool + * IORegistry Key: kIOPMPSAtWarnLevelKey + * True if draining battery capacity and past warn level + * + * AtCriticalLevel + * Type: bool + * IORegistry Key: kIOPMPSAtCriticalLevelKey + * True if draining battery capacity and past critical level + * + * CurrentCapacity + * MaxCapacity + * Type: unsigned int + * IORegistry Key: kIOPMPSCurrentCapacityKey, kIOPMPSMaxCapacityKey + * Capacity measured in mAh + * + * TimeRemaining + * Type: int + * IORegistry Key: kIOPMPSTimeRemainingKey + * Time remaining measured in minutes + * + * Amperage + * Type: int + * IORegistry Key: kIOPMPSAmperageKey + * Current is measured in mA + * + * Voltage + * Type: unsigned int + * IORegistry Key: kIOPMPSVoltageKey + * Voltage measured in mV + * + * CycleCount + * Type: unsigned int + * IORegistry Key: kIOPMPSCycleCountKey + * Number of charge/discharge cycles + * + * AdapterInfo + * Type: int + * IORegistry Key: kIOPMPSAdapterInfoKey + * Power adapter information + * + * Location + * Type: int + * IORegistry Key: kIOPMPSLocationKey + * Clue about battery's location in machine - Left vs. Right + * + * ErrorCondition + * Type: OSSymbol + * IORegistry Key: kIOPMPSErrorConditionKey + * String describing error state of battery + * + * Manufacturer + * Type: OSSymbol + * IORegistry Key: kIOPMPSManufacturerKey + * String describing battery manufacturer + * + * Model + * Type: OSSymbol + * IORegistry Key: kIOPMPSModelKey + * String describing model number + * + * Serial + * Type: OSSymbol + * IORegistry Key: kIOPMPSSerialKey + * String describing serial number or unique info + * + * LegacyIOBatteryInfo + * Type: OSDictionary + * IORegistry Key: kIOPMPSLegacyBatteryInfoKey + * Dictionary conforming to the OS X 10.0-10.4 + */ -class IOPMPowerSource : public OSObject +class IOPMPowerSource : public IOService { OSDeclareDefaultStructors(IOPMPowerSource) + friend class IOPMPowerSourceList; + protected: + // Tracking for IOPMPowerSourceList + IOPMPowerSource *nextInList; + + OSDictionary *properties; - UInt32 bFlags; - UInt32 bTimeRemaining; - UInt16 bCurCapacity; - UInt16 bMaxCapacity; - SInt16 bCurrent; - UInt16 bVoltage; - UInt16 bBatteryIndex; + const OSSymbol *externalConnectedKey; + const OSSymbol *externalChargeCapableKey; + const OSSymbol *batteryInstalledKey; + const OSSymbol *chargingKey; + const OSSymbol *warnLevelKey; + const OSSymbol *criticalLevelKey; + const OSSymbol *currentCapacityKey; + const OSSymbol *maxCapacityKey; + const OSSymbol *timeRemainingKey; + const OSSymbol *amperageKey; + const OSSymbol *voltageKey; + const OSSymbol *cycleCountKey; + const OSSymbol *adapterInfoKey; + const OSSymbol *locationKey; + const OSSymbol *errorConditionKey; + const OSSymbol *manufacturerKey; + const OSSymbol *modelKey; + const OSSymbol *serialKey; + const OSSymbol *batteryInfoKey; public: - IOPMPowerSource * nextInList; - - bool init (unsigned short whichBatteryIndex); - unsigned long capacityPercentRemaining (void); - bool atWarnLevel (void); - bool depleted (void); +/*! @function powerSource + @abstract Creates a new IOPMPowerSource nub. Must be attached to IORegistry, + and registered by provider. +*/ + static IOPMPowerSource *powerSource(void); - // accessors + virtual bool init(void); + + virtual void free(void); - bool isInstalled (void); - bool isCharging (void); - bool acConnected (void); - unsigned long timeRemaining (void); - unsigned long maxCapacity (void); - unsigned long curCapacity (void); - long currentDrawn (void); - unsigned long voltage (void); +/*! @function updateStatus + @abstract Must be called by physical battery controller when battery state + has changed significantly. + @discussion The system will not poll this object for battery updates. Rather \ + the battery's controller must call updateStatus() every time state changes \ + and the settings will be relayed to higher levels of power management. \ + The subclassing driver should override this only if the driver needs to add \ + new settings to the base class. +*/ + virtual void updateStatus(void); - // calculations +/* Public accessors for battery state + */ + bool externalConnected(void); + bool externalChargeCapable(void); + bool batteryInstalled(void); + bool isCharging(void); + bool atWarnLevel(void); + bool atCriticalLevel(void); - // function updateStatus is called whenever the system needs - // to obtain the latest power source state...must be overridden - // by subclasses. - virtual void updateStatus (void); -}; + unsigned int currentCapacity(void); + unsigned int maxCapacity(void); + unsigned int capacityPercentRemaining(void); + int timeRemaining(void); + int amperage(void); + unsigned int voltage(void); + unsigned int cycleCount(void); + int adapterInfo(void); + int location(void); + + OSSymbol *errorCondition(void); + OSSymbol *manufacturer(void); + OSSymbol *model(void); + OSSymbol *serial(void); + OSDictionary *legacyIOBatteryInfo(void); + +protected: +/* Protected "setter" methods for subclasses + * Subclasses should use these setters to modify all battery properties. + * + * Subclasses must follow all property changes with a call to updateStatus() + * to flush settings changes to upper level battery API clients. + * + */ + void setExternalConnected(bool); + void setExternalChargeCapable(bool); + void setBatteryInstalled(bool); + void setIsCharging(bool); + void setAtWarnLevel(bool); + void setAtCriticalLevel(bool); + void setCurrentCapacity(unsigned int); + void setMaxCapacity(unsigned int); + void setTimeRemaining(int); + void setAmperage(int); + void setVoltage(unsigned int); + void setCycleCount(unsigned int); + void setAdapterInfo(int); + void setLocation(int); + void setErrorCondition(OSSymbol *); + void setManufacturer(OSSymbol *); + void setModel(OSSymbol *); + void setSerial(OSSymbol *); + void setLegacyIOBatteryInfo(OSDictionary *); + +}; +#endif diff --git a/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h b/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h index e46736196..eda4983d0 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h +++ b/iokit/IOKit/pwr_mgt/IOPMPowerSourceList.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,27 +26,23 @@ class IOPMPowerSource; class IOPMPowerSourceList : public OSObject { -OSDeclareDefaultStructors(IOPMPowerSourceList) - -private: - -IOPMPowerSource * firstItem; // pointer to first power source in list -unsigned long length; // how many power sources are in the list - - -public: -void initialize ( void ); - -IOReturn addToList ( IOPMPowerSource * newPowerSource ); - -IOPMPowerSource * firstInList ( void ); - -IOPMPowerSource * nextInList ( IOPMPowerSource * currentItem ); - -unsigned long numberOfItems ( void ); - -IOReturn removeFromList ( IOPMPowerSource * theItem ); - -void free ( void ); + OSDeclareDefaultStructors(IOPMPowerSourceList) + private: + // pointer to first power source in list + IOPMPowerSource *firstItem; + + // how many power sources are in the list + unsigned long length; + + public: + void initialize(void); + void free(void); + + unsigned long numberOfItems(void); + IOReturn addToList(IOPMPowerSource *newPowerSource); + IOReturn removeFromList(IOPMPowerSource *theItem); + + IOPMPowerSource *firstInList(void); + IOPMPowerSource *nextInList(IOPMPowerSource *currentItem); }; diff --git a/iokit/IOKit/pwr_mgt/Makefile b/iokit/IOKit/pwr_mgt/Makefile index 34e8b138c..5b6f80277 100644 --- a/iokit/IOKit/pwr_mgt/Makefile +++ b/iokit/IOKit/pwr_mgt/Makefile @@ -30,7 +30,7 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) -INSTALL_MI_LIST = IOPMLibDefs.h IOPM.h +INSTALL_MI_LIST = IOPMLibDefs.h IOPM.h IOPMDeprecated.h INSTALL_MI_LCL_LIST = "" INSTALL_MI_DIR = $(MI_DIR) diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 8984051fe..df1ded306 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -28,8 +28,6 @@ class IOPMPowerStateQueue; class RootDomainUserClient; -#define kRootDomainSupportedFeatures "Supported Features" - enum { kRootDomainSleepNotSupported = 0x00000000, kRootDomainSleepSupported = 0x00000001, @@ -37,31 +35,35 @@ enum { kPCICantSleep = 0x00000004 }; -// Constants for use as arguments to the settings callback PMU/SMU defines -// with registerPMSettingsController +#define kRootDomainSupportedFeatures "Supported Features" + +// Supported Feature bitfields for IOPMrootDomain::publishFeature() enum { - kIOPMAutoWakeSetting = 1, - kIOPMAutoPowerOnSetting, - kIOPMWakeOnRingSetting, - kIOPMAutoRestartOnPowerLossSetting, - kIOPMWakeOnLidSetting, - kIOPMWakeOnACChangeSetting, - kIOPMTimeZoneSetting + kIOPMSupportedOnAC = 1<<0, + kIOPMSupportedOnBatt = 1<<1, + kIOPMSupportedOnUPS = 1<<2 }; -typedef int IOPMSystemSettingType; - - -typedef IOReturn (*IOPMSettingControllerCallback)(IOPMSystemSettingType arg_type, int arg_val, void *info); +typedef IOReturn (*IOPMSettingControllerCallback) \ + (OSObject *target, const OSSymbol *type, \ + OSObject *val, uintptr_t refcon); extern "C" { - IONotifier * registerSleepWakeInterest(IOServiceInterestHandler, void *, void * = 0); - IONotifier * registerPrioritySleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref = 0); - IOReturn acknowledgeSleepWakeNotification(void * ); - IOReturn vetoSleepWakeNotification(void * PMrefcon); - IOReturn rootDomainRestart ( void ); - IOReturn rootDomainShutdown ( void ); + IONotifier * registerSleepWakeInterest( + IOServiceInterestHandler, void *, void * = 0); + + IONotifier * registerPrioritySleepWakeInterest( + IOServiceInterestHandler handler, + void * self, void * ref = 0); + + IOReturn acknowledgeSleepWakeNotification(void * ); + + IOReturn vetoSleepWakeNotification(void * PMrefcon); + + IOReturn rootDomainRestart ( void ); + + IOReturn rootDomainShutdown ( void ); } #define IOPM_ROOTDOMAIN_REV 2 @@ -90,24 +92,95 @@ OSDeclareDefaultStructors(IOPMrootDomain) void stopIgnoringClamshellEventsDuringWakeup ( void ); void wakeFromDoze( void ); void broadcast_it (unsigned long, unsigned long ); + + // KEXT driver announces support of power management feature void publishFeature( const char *feature ); + + // KEXT driver announces support of power management feature + // And specifies power sources with kIOPMSupportedOn{AC/Batt/UPS} bitfield. + // Returns a unique uint32_t identifier for later removing support for this + // feature. + // NULL is acceptable for uniqueFeatureID for kexts without plans to unload. + void publishFeature( const char *feature, + uint32_t supportedWhere, + uint32_t *uniqueFeatureID); + + // KEXT driver announces removal of a previously published power management + // feature. Pass 'uniqueFeatureID' returned from publishFeature() + IOReturn removePublishedFeature( uint32_t removeFeatureID ); + void unIdleDevice( IOService *, unsigned long ); void announcePowerSourceChange( void ); - + // Override of these methods for logging purposes. virtual IOReturn changePowerStateTo ( unsigned long ordinal ); virtual IOReturn changePowerStateToPriv ( unsigned long ordinal ); - IOReturn registerPMSettingController(IOPMSettingControllerCallback, void *); +/*! @function copyPMSetting + @abstract Copy the current value for a PM setting. Returns OSNumber or + OSData depending on the setting. + @param whichSetting Name of the desired setting. + @result OSObject *value if valid, NULL otherwise. */ + OSObject *copyPMSetting(OSSymbol *whichSetting); + +/*! @function registerPMSettingController + @abstract Register for callbacks on changes to certain PM settings. + @param settings NULL terminated array of C strings, each string for a PM + setting that the caller is interested in and wants to get callbacks for. + @param callout C function ptr or member function cast as such. + @param target The target of the callback, usually 'this' + @param refcon Will be passed to caller in callback; for caller's use. + @param handle Caller should keep the OSObject * returned here. If non-NULL, + handle will have a retain count of 1 on return. To deregister, pass to + unregisterPMSettingController() + @result kIOReturnSuccess on success. */ + IOReturn registerPMSettingController( + const OSSymbol *settings[], + IOPMSettingControllerCallback callout, + OSObject *target, + uintptr_t refcon, + OSObject **handle); // out param + +/*! @function registerPMSettingController + @abstract Register for callbacks on changes to certain PM settings. + @param settings NULL terminated array of C strings, each string for a PM + setting that the caller is interested in and wants to get callbacks for. + @param supportedPowerSources bitfield indicating which power sources these + settings are supported for (kIOPMSupportedOnAC, etc.) + @param callout C function ptr or member function cast as such. + @param target The target of the callback, usually 'this' + @param refcon Will be passed to caller in callback; for caller's use. + @param handle Caller should keep the OSObject * returned here. If non-NULL, + handle will have a retain count of 1 on return. To deregister, pass to + unregisterPMSettingController() + @result kIOReturnSuccess on success. */ + IOReturn registerPMSettingController( + const OSSymbol *settings[], + uint32_t supportedPowerSources, + IOPMSettingControllerCallback callout, + OSObject *target, + uintptr_t refcon, + OSObject **handle); // out param private: - class IORootParent * patriarch; // points to our parent - long sleepSlider; // pref: idle time before idle sleep - long longestNonSleepSlider; // pref: longest of other idle times - long extraSleepDelay; // sleepSlider - longestNonSleepSlider - thread_call_t extraSleepTimer; // used to wait between say display idle and system idle - thread_call_t clamshellWakeupIgnore; // Used to ignore clamshell close events while we're waking from sleep + // Points to our parent + class IORootParent * patriarch; + + // Pref: idle time before idle sleep + long sleepSlider; + + // Pref: longest of other idle times (disk and display) + long longestNonSleepSlider; + + // Difference between sleepSlider and longestNonSleepSlider + long extraSleepDelay; + + // Used to wait between say display idle and system idle + thread_call_t extraSleepTimer; + + // Used to ignore clamshell close events while we're waking from sleep + thread_call_t clamshellWakeupIgnore; virtual void powerChangeDone ( unsigned long ); virtual void command_received ( void *, void * , void * , void *); @@ -127,13 +200,16 @@ OSDeclareDefaultStructors(IOPMrootDomain) static bool displayWranglerPublished( void * target, void * refCon, IOService * newService); - static bool batteryLocationPublished( void * target, void * refCon, + static bool batteryPublished( void * target, void * refCon, IOService * resourceService ); + void adjustPowerState ( void ); void setQuickSpinDownTimeout ( void ); - void adjustPowerState( void ); void restoreUserSpinDownTimeout ( void ); - + + bool shouldSleepOnClamshellClosed (void ); + void sendClientClamshellNotification ( void ); + IOLock *featuresDictLock; // guards supportedFeatures IOPMPowerStateQueue *pmPowerStateQueue; unsigned int user_spindown; // User's selected disk spindown value @@ -149,25 +225,29 @@ OSDeclareDefaultStructors(IOPMrootDomain) unsigned int acAdaptorConnect:1; unsigned int ignoringClamshellDuringWakeup:1; - unsigned int reservedA:6; + unsigned int clamshellIsClosed:1; + unsigned int clamshellExists:1; + unsigned int reservedA:4; unsigned char reservedB[3]; - struct PMSettingCtrl { - IOPMSettingControllerCallback func; - void *refcon; - }; + OSArray *allowedPMSettings; + + // Settings controller info + IORecursiveLock *settingsCtrlLock; + OSDictionary *settingsCallbacks; + OSDictionary *fPMSettingsDict; + IOReturn setPMSetting(const OSSymbol *, OSObject *); + + thread_call_t diskSyncCalloutEntry; + IONotifier *_batteryPublishNotifier; + IONotifier *_displayWranglerNotifier; - // Private helper to call PM setting controller - IOReturn setPMSetting(int type, OSNumber *); - struct ExpansionData { - PMSettingCtrl *_settingController; - thread_call_t diskSyncCalloutEntry; - IONotifier *_batteryLocationNotifier; - IONotifier *_displayWranglerNotifier; }; ExpansionData *_reserved; IOOptionBits platformSleepSupport; + + friend class PMSettingObject; }; class IORootParent: public IOService @@ -179,6 +259,8 @@ OSDeclareDefaultStructors(IORootParent) public: + virtual IOReturn changePowerStateToPriv ( unsigned long ordinal ); + bool start ( IOService * nub ); void shutDownSystem ( void ); void restartSystem ( void ); diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 337c16895..0d934cdb8 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -23,9 +23,11 @@ #include #include +#include #include #include "IOKitKernelInternal.h" +#include "IOCopyMapper.h" __BEGIN_DECLS void ipc_port_release_send(ipc_port_t port); @@ -34,6 +36,12 @@ void ipc_port_release_send(ipc_port_t port); vm_map_t IOPageableMapForAddress( vm_address_t address ); __END_DECLS +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +volatile ppnum_t gIOHighestAllocatedPage; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + #define super IOGeneralMemoryDescriptor OSDefineMetaClassAndStructors(IOBufferMemoryDescriptor, IOGeneralMemoryDescriptor); @@ -87,8 +95,20 @@ bool IOBufferMemoryDescriptor::initWithOptions( vm_size_t capacity, vm_offset_t alignment, task_t inTask) +{ + mach_vm_address_t physicalMask = 0; + return (initWithPhysicalMask(inTask, options, capacity, alignment, physicalMask)); +} + +bool IOBufferMemoryDescriptor::initWithPhysicalMask( + task_t inTask, + IOOptionBits options, + mach_vm_size_t capacity, + mach_vm_address_t alignment, + mach_vm_address_t physicalMask) { kern_return_t kr; + addr64_t lastIOAddr; vm_map_t vmmap = 0; IOOptionBits iomdOptions = kIOMemoryAsReference | kIOMemoryTypeVirtual; @@ -110,6 +130,12 @@ bool IOBufferMemoryDescriptor::initWithOptions( if ((inTask != kernel_task) && !(options & kIOMemoryPageable)) return false; + if (physicalMask && (alignment <= 1)) + alignment = ((physicalMask ^ PAGE_MASK) & PAGE_MASK) + 1; + + if ((options & kIOMemoryPhysicallyContiguous) && !physicalMask) + physicalMask = 0xFFFFFFFF; + _alignment = alignment; if (options & kIOMemoryPageable) { @@ -168,7 +194,7 @@ bool IOBufferMemoryDescriptor::initWithOptions( #if IOALLOCDEBUG debug_iomallocpageable_size += size; #endif - if ((NULL == inTask) && (options & kIOMemoryPageable)) + if (NULL == inTask) inTask = kernel_task; else if (inTask == kernel_task) { @@ -176,7 +202,6 @@ bool IOBufferMemoryDescriptor::initWithOptions( } else { - if( !reserved) { reserved = IONew( ExpansionData, 1 ); if( !reserved) @@ -187,23 +212,58 @@ bool IOBufferMemoryDescriptor::initWithOptions( reserved->map = vmmap; } } - else + else { - // @@@ gvdl: Need to remove this - // Buffer should never auto prepare they should be prepared explicitly - // But it never was enforced so what are you going to do? - iomdOptions |= kIOMemoryAutoPrepare; - - /* Allocate a wired-down buffer inside kernel space. */ - if (options & kIOMemoryPhysicallyContiguous) - _buffer = IOMallocContiguous(capacity, alignment, 0); - else if (alignment > 1) - _buffer = IOMallocAligned(capacity, alignment); + if (IOMapper::gSystem) + // assuming mapped space is 2G + lastIOAddr = (1UL << 31) - PAGE_SIZE; else - _buffer = IOMalloc(capacity); + lastIOAddr = ptoa_64(gIOHighestAllocatedPage); - if (!_buffer) - return false; + if (physicalMask && (lastIOAddr != (lastIOAddr & physicalMask))) + { + mach_vm_address_t address; + iomdOptions &= ~kIOMemoryTypeVirtual; + iomdOptions |= kIOMemoryTypePhysical; + + address = IOMallocPhysical(capacity, physicalMask); + _buffer = (void *) address; + if (!_buffer) + return false; + + if (inTask == kernel_task) + { + vmmap = kernel_map; + } + else if (NULL != inTask) + { + if( !reserved) { + reserved = IONew( ExpansionData, 1 ); + if( !reserved) + return( false ); + } + vmmap = get_task_map(inTask); + vm_map_reference(vmmap); + reserved->map = vmmap; + } + inTask = 0; + } + else + { + // Buffer shouldn't auto prepare they should be prepared explicitly + // But it never was enforced so what are you going to do? + iomdOptions |= kIOMemoryAutoPrepare; + + /* Allocate a wired-down buffer inside kernel space. */ + if (options & kIOMemoryPhysicallyContiguous) + _buffer = (void *) IOKernelAllocateContiguous(capacity, alignment); + else if (alignment > 1) + _buffer = IOMallocAligned(capacity, alignment); + else + _buffer = IOMalloc(capacity); + if (!_buffer) + return false; + } } _singleRange.v.address = (vm_address_t) _buffer; @@ -213,22 +273,55 @@ bool IOBufferMemoryDescriptor::initWithOptions( inTask, iomdOptions, /* System mapper */ 0)) return false; - if (options & kIOMemoryPageable) + if (physicalMask && !IOMapper::gSystem) { - kern_return_t kr; + IOMDDMACharacteristics mdSummary; - if (vmmap) + bzero(&mdSummary, sizeof(mdSummary)); + IOReturn rtn = dmaCommandOperation( + kIOMDGetCharacteristics, + &mdSummary, sizeof(mdSummary)); + if (rtn) + return false; + + if (mdSummary.fHighestPage) { - kr = doMap(vmmap, (IOVirtualAddress *) &_buffer, kIOMapAnywhere, 0, round_page_32(capacity)); - if (KERN_SUCCESS != kr) + ppnum_t highest; + while (mdSummary.fHighestPage > (highest = gIOHighestAllocatedPage)) { - _buffer = 0; - return( false ); + if (OSCompareAndSwap(highest, mdSummary.fHighestPage, + (UInt32 *) &gIOHighestAllocatedPage)) + break; } - _singleRange.v.address = (vm_address_t) _buffer; + lastIOAddr = ptoa_64(mdSummary.fHighestPage); + } + else + lastIOAddr = ptoa_64(gIOLastPage); + + if (lastIOAddr != (lastIOAddr & physicalMask)) + { + if (kIOMemoryTypePhysical != (_flags & kIOMemoryTypeMask)) + { + // flag a retry + _physSegCount = 1; + } + return false; } } + if (vmmap) + { + kr = doMap(vmmap, (IOVirtualAddress *) &_buffer, kIOMapAnywhere, 0, capacity); + if (KERN_SUCCESS != kr) + { + _buffer = 0; + return( false ); + } + + if (kIOMemoryTypeVirtual & iomdOptions) + _singleRange.v.address = (vm_address_t) _buffer; + } + setLength(capacity); return true; @@ -243,8 +336,44 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithOptions( IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; if (me && !me->initWithOptions(options, capacity, alignment, inTask)) { + bool retry = me->_physSegCount; me->release(); me = 0; + if (retry) + { + me = new IOBufferMemoryDescriptor; + if (me && !me->initWithOptions(options, capacity, alignment, inTask)) + { + me->release(); + me = 0; + } + } + } + return me; +} + +IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + task_t inTask, + IOOptionBits options, + mach_vm_size_t capacity, + mach_vm_address_t physicalMask) +{ + IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; + + if (me && !me->initWithPhysicalMask(inTask, options, capacity, 1, physicalMask)) + { + bool retry = me->_physSegCount; + me->release(); + me = 0; + if (retry) + { + me = new IOBufferMemoryDescriptor; + if (me && !me->initWithPhysicalMask(inTask, options, capacity, 1, physicalMask)) + { + me->release(); + me = 0; + } + } } return me; } @@ -262,13 +391,7 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::withOptions( vm_size_t capacity, vm_offset_t alignment) { - IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; - - if (me && !me->initWithOptions(options, capacity, alignment, kernel_task)) { - me->release(); - me = 0; - } - return me; + return(IOBufferMemoryDescriptor::inTaskWithOptions(kernel_task, options, capacity, alignment)); } @@ -329,9 +452,21 @@ IOBufferMemoryDescriptor::withBytes(const void * inBytes, { IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; - if (me && !me->initWithBytes(inBytes, inLength, inDirection, inContiguous)){ - me->release(); - me = 0; + if (me && !me->initWithBytes(inBytes, inLength, inDirection, inContiguous)) + { + bool retry = me->_physSegCount; + me->release(); + me = 0; + if (retry) + { + me = new IOBufferMemoryDescriptor; + if (me && !me->initWithBytes(inBytes, inLength, inDirection, inContiguous)) + { + me->release(); + me = 0; + } + } + } return me; } @@ -345,11 +480,13 @@ void IOBufferMemoryDescriptor::free() { // Cache all of the relevant information on the stack for use // after we call super::free()! - IOOptionBits options = _options; - vm_size_t size = _capacity; - void * buffer = _buffer; - vm_map_t vmmap = 0; - vm_offset_t alignment = _alignment; + IOOptionBits flags = _flags; + IOOptionBits options = _options; + vm_size_t size = _capacity; + void * buffer = _buffer; + IOVirtualAddress source = _singleRange.v.address; + vm_map_t vmmap = 0; + vm_offset_t alignment = _alignment; if (reserved) { @@ -376,8 +513,14 @@ void IOBufferMemoryDescriptor::free() } else if (buffer) { - if (options & kIOMemoryPhysicallyContiguous) - IOFreeContiguous(buffer, size); + if (kIOMemoryTypePhysical == (flags & kIOMemoryTypeMask)) + { + if (vmmap) + vm_deallocate(vmmap, (vm_address_t) buffer, round_page_32(size)); + IOFreePhysical((mach_vm_address_t) source, size); + } + else if (options & kIOMemoryPhysicallyContiguous) + IOKernelFreeContiguous((mach_vm_address_t) buffer, size); else if (alignment > 1) IOFreeAligned(buffer, size); else @@ -437,14 +580,21 @@ void IOBufferMemoryDescriptor::setDirection(IODirection direction) bool IOBufferMemoryDescriptor::appendBytes(const void * bytes, vm_size_t withLength) { - vm_size_t actualBytesToCopy = min(withLength, _capacity - _length); + vm_size_t actualBytesToCopy = min(withLength, _capacity - _length); + IOByteCount offset; assert(_length <= _capacity); - bcopy(/* from */ bytes, (void *)(_singleRange.v.address + _length), - actualBytesToCopy); + + offset = _length; _length += actualBytesToCopy; _singleRange.v.length += actualBytesToCopy; + if (_task == kernel_task) + bcopy(/* from */ bytes, (void *)(_singleRange.v.address + offset), + actualBytesToCopy); + else + writeBytes(offset, bytes, actualBytesToCopy); + return true; } @@ -455,9 +605,13 @@ IOBufferMemoryDescriptor::appendBytes(const void * bytes, vm_size_t withLength) */ void * IOBufferMemoryDescriptor::getBytesNoCopy() { - return (void *)_singleRange.v.address; + if (kIOMemoryTypePhysical == (_flags & kIOMemoryTypeMask)) + return _buffer; + else + return (void *)_singleRange.v.address; } + /* * getBytesNoCopy: * @@ -466,13 +620,30 @@ void * IOBufferMemoryDescriptor::getBytesNoCopy() void * IOBufferMemoryDescriptor::getBytesNoCopy(vm_size_t start, vm_size_t withLength) { - if (start < _length && (start + withLength) <= _length) - return (void *)(_singleRange.v.address + start); + IOVirtualAddress address; + if (kIOMemoryTypePhysical == (_flags & kIOMemoryTypeMask)) + address = (IOVirtualAddress) _buffer; + else + address = _singleRange.v.address; + + if (start < _length && (start + withLength) <= _length) + return (void *)(address + start); return 0; } +/* DEPRECATED */ void * IOBufferMemoryDescriptor::getVirtualSegment(IOByteCount offset, +/* DEPRECATED */ IOByteCount * lengthOfSegment) +{ + void * bytes = getBytesNoCopy(offset, 0); + + if (bytes && lengthOfSegment) + *lengthOfSegment = _length - offset; + + return bytes; +} + OSMetaClassDefineReservedUsed(IOBufferMemoryDescriptor, 0); -OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 1); +OSMetaClassDefineReservedUsed(IOBufferMemoryDescriptor, 1); OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 2); OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 3); OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 4); diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index b3cd11f53..c86d5a231 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -221,9 +221,12 @@ bool IOCPU::setProperty(const OSSymbol *aKey, OSObject *anObject) bool IOCPU::serializeProperties(OSSerialize *serialize) const { - super::setProperty(gIOCPUStateKey, gIOCPUStateNames[_cpuState]); - - return super::serializeProperties(serialize); + bool result; + OSDictionary *dict = dictionaryWithProperties(); + dict->setObject(gIOCPUStateKey, gIOCPUStateNames[_cpuState]); + result = dict->serialize(serialize); + dict->release(); + return result; } IOReturn IOCPU::setProperties(OSObject *properties) @@ -386,8 +389,10 @@ void IOCPUInterruptController::setCPUInterruptProperties(IOService *service) void IOCPUInterruptController::enableCPUInterrupt(IOCPU *cpu) { - ml_install_interrupt_handler(cpu, cpu->getCPUNumber(), this, - (IOInterruptHandler)&IOCPUInterruptController::handleInterrupt, 0); + IOInterruptHandler handler = OSMemberFunctionCast( + IOInterruptHandler, this, &IOCPUInterruptController::handleInterrupt); + + ml_install_interrupt_handler(cpu, cpu->getCPUNumber(), this, handler, 0); enabledCPUs++; diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp index 9f0552193..2d05a9b4a 100644 --- a/iokit/Kernel/IOCatalogue.cpp +++ b/iokit/Kernel/IOCatalogue.cpp @@ -1162,7 +1162,6 @@ bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) return result; } - /********************************************************************* * This function clears out all references to the in-kernel linker, * frees the list of startup extensions in extensionDict, and @@ -1172,10 +1171,13 @@ bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) *********************************************************************/ kern_return_t IOCatalogue::removeKernelLinker(void) { kern_return_t result = KERN_SUCCESS; - struct segment_command * segment; + struct segment_command * segmentLE, *segmentKLD; + boolean_t keepsyms = FALSE; +#if __ppc__ char * dt_segment_name; void * segment_paddress; int segment_size; +#endif /* This must be the very first thing done by this function. */ @@ -1190,6 +1192,8 @@ kern_return_t IOCatalogue::removeKernelLinker(void) { goto finish; } + PE_parse_boot_arg("keepsyms", &keepsyms); + IOLog("Jettisoning kernel linker.\n"); kernelLinkerPresent = 0; @@ -1209,25 +1213,24 @@ kern_return_t IOCatalogue::removeKernelLinker(void) { * memory so that any cross-dependencies (not that there * should be any) are handled. */ - segment = getsegbyname("__KLD"); - if (!segment) { + segmentKLD = getsegbyname("__KLD"); + if (!segmentKLD) { IOLog("error removing kernel linker: can't find %s segment\n", "__KLD"); result = KERN_FAILURE; goto finish; } - OSRuntimeUnloadCPPForSegment(segment); + OSRuntimeUnloadCPPForSegment(segmentKLD); - segment = getsegbyname("__LINKEDIT"); - if (!segment) { + segmentLE = getsegbyname("__LINKEDIT"); + if (!segmentLE) { IOLog("error removing kernel linker: can't find %s segment\n", "__LINKEDIT"); result = KERN_FAILURE; goto finish; } - OSRuntimeUnloadCPPForSegment(segment); - - + OSRuntimeUnloadCPPForSegment(segmentLE); +#if __ppc__ /* Free the memory that was set up by bootx. */ dt_segment_name = "Kernel-__KLD"; @@ -1235,12 +1238,29 @@ kern_return_t IOCatalogue::removeKernelLinker(void) { IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress, (int)segment_size); } - +#elif __i386__ + /* On x86, use the mapping data from the segment load command to + * unload KLD and LINKEDIT directly, unless the keepsyms boot-arg + * was enabled. This may invalidate any assumptions about + * "avail_start" defining the lower bound for valid physical addresses. + */ + if (!keepsyms && segmentKLD->vmaddr && segmentKLD->vmsize) + ml_static_mfree(segmentKLD->vmaddr, segmentKLD->vmsize); +#else +#error arch +#endif +#if __ppc__ dt_segment_name = "Kernel-__LINKEDIT"; if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) { IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress, (int)segment_size); } +#elif __i386__ + if (!keepsyms && segmentLE->vmaddr && segmentLE->vmsize) + ml_static_mfree(segmentLE->vmaddr, segmentLE->vmsize); +#else +#error arch +#endif struct section * sect; sect = getsectbyname("__PRELINK", "__symtab"); diff --git a/iokit/Kernel/IOCommandGate.cpp b/iokit/Kernel/IOCommandGate.cpp index dadd04753..37a5304c2 100644 --- a/iokit/Kernel/IOCommandGate.cpp +++ b/iokit/Kernel/IOCommandGate.cpp @@ -19,6 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#include + #include #include #include @@ -56,71 +58,98 @@ IOCommandGate::commandGate(OSObject *inOwner, Action inAction) return me; } -IOReturn IOCommandGate::runCommand(void *arg0, void *arg1, - void *arg2, void *arg3) +/* virtual */ void IOCommandGate::disable() { - IOReturn res; + if (workLoop && !workLoop->inGate()) + OSReportWithBacktrace("IOCommandGate::disable() called when not gated"); - if (!enabled) - return kIOReturnNotPermitted; + super::disable(); +} - if (!action) - return kIOReturnNoResources; +/* virtual */ void IOCommandGate::enable() +{ + if (workLoop) { + closeGate(); + super::enable(); + wakeupGate(&enabled, /* oneThread */ false); // Unblock sleeping threads + openGate(); + } +} - // closeGate is recursive so don't worry if we already hold the lock. - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) action, (unsigned int) owner); +/* virtual */ void IOCommandGate::free() +{ + setWorkLoop(0); + super::free(); +} - closeGate(); - res = (*(Action) action)(owner, arg0, arg1, arg2, arg3); - openGate(); +/* virtual */ void IOCommandGate::setWorkLoop(IOWorkLoop *inWorkLoop) +{ + uintptr_t *sleepersP = (uintptr_t *) &reserved; + if (!inWorkLoop && workLoop) { // tearing down + closeGate(); + *sleepersP |= 1; + while (*sleepersP >> 1) { + thread_wakeup_with_result(&enabled, THREAD_INTERRUPTED); + sleepGate(sleepersP, THREAD_UNINT); + } + *sleepersP = 0; + openGate(); + } + else - return res; + super::setWorkLoop(inWorkLoop); +} + +IOReturn IOCommandGate::runCommand(void *arg0, void *arg1, + void *arg2, void *arg3) +{ + return runAction((Action) action, arg0, arg1, arg2, arg3); +} + +IOReturn IOCommandGate::attemptCommand(void *arg0, void *arg1, + void *arg2, void *arg3) +{ + return attemptAction((Action) action, arg0, arg1, arg2, arg3); } IOReturn IOCommandGate::runAction(Action inAction, void *arg0, void *arg1, void *arg2, void *arg3) { - IOReturn res; - - if (!enabled) - return kIOReturnNotPermitted; - if (!inAction) return kIOReturnBadArgument; IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), (unsigned int) inAction, (unsigned int) owner); - // closeGate is recursive so don't worry if we already hold the lock. + // closeGate is recursive needn't worry if we already hold the lock. closeGate(); - res = (*inAction)(owner, arg0, arg1, arg2, arg3); - openGate(); - return res; -} - -IOReturn IOCommandGate::attemptCommand(void *arg0, void *arg1, - void *arg2, void *arg3) -{ + // If the command gate is disabled and we aren't on the workloop thread + // itself then sleep until we get enabled. IOReturn res; + if (!workLoop->onThread()) { + while (!enabled) { + uintptr_t *sleepersP = (uintptr_t *) &reserved; - if (!enabled) - return kIOReturnNotPermitted; + *sleepersP += 2; + IOReturn res = sleepGate(&enabled, THREAD_ABORTSAFE); + *sleepersP -= 2; - if (!action) - return kIOReturnNoResources; + bool wakeupTearDown = (*sleepersP & 1); + if (res || wakeupTearDown) { + openGate(); - // Try to hold the lock if can't get return immediately. - if (!tryCloseGate()) - return kIOReturnCannotLock; + if (wakeupTearDown) + commandWakeup(sleepersP); // No further resources used - // closeGate is recursive so don't worry if we already hold the lock. - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) action, (unsigned int) owner); + return kIOReturnAborted; + } + } + } - res = (*(Action) action)(owner, arg0, arg1, arg2, arg3); + // Must be gated and on the work loop or enabled + res = (*inAction)(owner, arg0, arg1, arg2, arg3); openGate(); return res; @@ -132,9 +161,6 @@ IOReturn IOCommandGate::attemptAction(Action inAction, { IOReturn res; - if (!enabled) - return kIOReturnNotPermitted; - if (!inAction) return kIOReturnBadArgument; @@ -142,10 +168,16 @@ IOReturn IOCommandGate::attemptAction(Action inAction, if (!tryCloseGate()) return kIOReturnCannotLock; - IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) inAction, (unsigned int) owner); + // If the command gate is disabled then sleep until we get a wakeup + if (!workLoop->onThread() && !enabled) + res = kIOReturnNotPermitted; + else { + IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), + (unsigned int) inAction, (unsigned int) owner); + + res = (*inAction)(owner, arg0, arg1, arg2, arg3); + } - res = (*inAction)(owner, arg0, arg1, arg2, arg3); openGate(); return res; diff --git a/iokit/Kernel/IOCommandPool.cpp b/iokit/Kernel/IOCommandPool.cpp index c50a61636..345b2b04c 100644 --- a/iokit/Kernel/IOCommandPool.cpp +++ b/iokit/Kernel/IOCommandPool.cpp @@ -141,9 +141,10 @@ IOCommandPool::getCommand(bool blockForCommand) IOReturn result = kIOReturnSuccess; IOCommand *command = 0; - result = fSerializer->runAction((IOCommandGate::Action) - &IOCommandPool::gatedGetCommand, - (void *) &command, (void *) blockForCommand); + IOCommandGate::Action func = OSMemberFunctionCast( + IOCommandGate::Action, this, &IOCommandPool::gatedGetCommand); + result = fSerializer-> + runAction(func, (void *) &command, (void *) blockForCommand); if (kIOReturnSuccess == result) return command; else @@ -180,8 +181,9 @@ gatedGetCommand(IOCommand **command, bool blockForCommand) void IOCommandPool:: returnCommand(IOCommand *command) { - (void) fSerializer->runAction((IOCommandGate::Action) - &IOCommandPool::gatedReturnCommand, (void *) command); + IOCommandGate::Action func = OSMemberFunctionCast( + IOCommandGate::Action, this, &IOCommandPool::gatedReturnCommand); + (void) fSerializer->runAction(func, (void *) command); } diff --git a/iokit/Kernel/IOCopyMapper.cpp b/iokit/Kernel/IOCopyMapper.cpp new file mode 100644 index 000000000..a808e04c4 --- /dev/null +++ b/iokit/Kernel/IOCopyMapper.cpp @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +// 45678901234567890123456789012345678901234567890123456789012345678901234567890 + +#include "IOCopyMapper.h" +#include + +#if 0 +#define DEBG(fmt, args...) { kprintf(fmt, ## args); } +#else +#define DEBG(fmt, args...) {} +#endif + +extern "C" { +extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +extern void ml_get_bouncepool_info( + vm_offset_t *phys_addr, + vm_size_t *size); +extern unsigned int vm_lopage_max_count; +extern unsigned int vm_himemory_mode; +} + +#define super IOMapper + +OSDefineMetaClassAndStructors(IOCopyMapper, IOMapper); + +// Remember no value can be bigger than 31 bits as the sign bit indicates +// that this entry is valid to the hardware and that would be bad if it wasn't +typedef struct FreeDARTEntry { +#if __BIG_ENDIAN__ + unsigned int + /* bool */ fValid : 1, + /* bool */ fInUse : 1, // Allocated but not inserted yet + /* bool */ : 5, // Align size on nibble boundary for debugging + /* uint */ fSize : 5, + /* uint */ : 2, + /* uint */ fNext :18; // offset of FreeDARTEntry's + +#elif __LITTLE_ENDIAN__ + unsigned int + /* uint */ fNext :18, // offset of FreeDARTEntry's + /* uint */ : 2, + /* uint */ fSize : 5, + /* bool */ : 5, // Align size on nibble boundary for debugging + /* bool */ fInUse : 1, // Allocated but not inserted yet + /* bool */ fValid : 1; +#endif +#if __BIG_ENDIAN__ + unsigned int + /* uint */ :14, + /* uint */ fPrev :18; // offset of FreeDARTEntry's + +#elif __LITTLE_ENDIAN__ + unsigned int + /* uint */ fPrev :18, // offset of FreeDARTEntry's + /* uint */ :14; +#endif +} FreeDARTEntry; + +typedef struct ActiveDARTEntry { +#if __BIG_ENDIAN__ + unsigned int + /* bool */ fValid : 1, // Must be set to one if valid + /* uint */ fPPNum :31; // ppnum_t page of translation +#define ACTIVEDARTENTRY(page) { true, page } + +#elif __LITTLE_ENDIAN__ + unsigned int + /* uint */ fPPNum :31, // ppnum_t page of translation + /* bool */ fValid : 1; // Must be set to one if valid +#define ACTIVEDARTENTRY(page) { page, true } + +#endif +}; + +#define kActivePerFree (sizeof(freeDART[0]) / sizeof(ActiveDARTEntry)) + +static SYSCTL_UINT(_kern, OID_AUTO, copyregionmax, + CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, + NULL, 0, ""); + +static SYSCTL_UINT(_kern, OID_AUTO, lowpagemax, + CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, + &vm_lopage_max_count, 0, ""); + +static SYSCTL_UINT(_kern, OID_AUTO, himemorymode, + CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, + &vm_himemory_mode, 0, ""); + +bool IOCopyMapper::initHardware(IOService * provider) +{ + UInt32 dartSizePages = 0; + + vm_offset_t phys_addr; + vm_size_t size; + ml_get_bouncepool_info(&phys_addr, &size); + + if (!size) + return (false); + + fBufferPage = atop_32(phys_addr); + dartSizePages = (atop_32(size) + kTransPerPage - 1) / kTransPerPage; + + fTableLock = IOLockAlloc(); + + if (!fTableLock) + return false; + + if (!allocTable(dartSizePages * kMapperPage)) + return false; + + UInt32 canMapPages = dartSizePages * kTransPerPage; + fMapperRegionSize = canMapPages; + for (fNumZones = 0; canMapPages; fNumZones++) + canMapPages >>= 1; + fNumZones -= 3; // correct for overshoot and minumum 16K pages allocation + + invalidateDART(0, fMapperRegionSize); + + breakUp(0, fNumZones, 0); + ((FreeDARTEntry *) fTable)->fInUse = true; + + fMapperRegionUsed = kMinZoneSize; + fMapperRegionMaxUsed = fMapperRegionUsed; + + sysctl__kern_copyregionmax.oid_arg1 = &fMapperRegionMaxUsed; + + sysctl_register_oid(&sysctl__kern_copyregionmax); + sysctl_register_oid(&sysctl__kern_lowpagemax); + sysctl_register_oid(&sysctl__kern_himemorymode); + + fDummyPage = IOMallocAligned(0x1000, 0x1000); + fDummyPageNumber = + pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) fDummyPage); + + return true; +} + +void IOCopyMapper::free() +{ + if (fDummyPage) { + IOFreeAligned(fDummyPage, 0x1000); + fDummyPage = 0; + fDummyPageNumber = 0; + } + + if (fTableLock) { + IOLockFree(fTableLock); + fTableLock = 0; + } + + super::free(); +} + +// Must be called while locked +void IOCopyMapper::breakUp(unsigned startIndex, unsigned endIndex, unsigned freeInd) +{ + unsigned int zoneSize; + FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; + + do { + // Need to break up bigger blocks of memory till we get one in our + // desired zone. + endIndex--; + zoneSize = (kMinZoneSize/2 << endIndex); + ppnum_t tail = freeInd + zoneSize; + + DEBG("breakup z %d start %x tail %x\n", endIndex, freeInd, tail); + + // By definition free lists must be empty + fFreeLists[endIndex] = tail; + freeDART[tail].fSize = endIndex; + freeDART[tail].fNext = freeDART[tail].fPrev = 0; + } while (endIndex != startIndex); + freeDART[freeInd].fSize = endIndex; +} + +// Zero is never a valid page to return +ppnum_t IOCopyMapper::iovmAlloc(IOItemCount pages) +{ + unsigned int zone, zoneSize, z, cnt; + ppnum_t next, ret = 0; + FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; + + // Can't alloc anything of less than minumum + if (pages < kMinZoneSize) + pages = kMinZoneSize; + + // Can't alloc anything bigger than 1/2 table + if (pages >= fMapperRegionSize/2) + { + panic("iovmAlloc 0x%x", pages); + return 0; + } + + // Find the appropriate zone for this allocation + for (zone = 0, zoneSize = kMinZoneSize; pages > zoneSize; zone++) + zoneSize <<= 1; + + { + IOLockLock(fTableLock); + + for (;;) { + for (z = zone; z < fNumZones; z++) { + if ( (ret = fFreeLists[z]) ) + break; + } + if (ret) + break; + + fFreeSleepers++; + IOLockSleep(fTableLock, fFreeLists, THREAD_UNINT); + fFreeSleepers--; + } + + // If we didn't find a entry in our size then break up the free block + // that we did find. + if (zone != z) + { + DEBG("breakup %d, %d, 0x%x\n", zone, z, ret); + breakUp(zone, z, ret); + } + + freeDART[ret].fInUse = true; // Mark entry as In Use + next = freeDART[ret].fNext; + DEBG("va: 0x%x, %d, ret %x next %x\n", (ret * kActivePerFree) + fBufferPage, pages, ret, next); + + fFreeLists[z] = next; + if (next) + freeDART[next].fPrev = 0; + + // ret is free list offset not page offset; + ret *= kActivePerFree; + + ActiveDARTEntry pageEntry = ACTIVEDARTENTRY(fDummyPageNumber); + for (cnt = 0; cnt < pages; cnt++) { + ActiveDARTEntry *activeDART = &fMappings[ret + cnt]; + *activeDART = pageEntry; + } + + fMapperRegionUsed += pages; + if (fMapperRegionUsed > fMapperRegionMaxUsed) + fMapperRegionMaxUsed = fMapperRegionUsed; + + IOLockUnlock(fTableLock); + } + + if (ret) + ret += fBufferPage; + + return ret; +} + + +void IOCopyMapper::invalidateDART(ppnum_t pnum, IOItemCount size) +{ + bzero((void *) &fMappings[pnum], size * sizeof(fMappings[0])); +} + +void IOCopyMapper::iovmFree(ppnum_t addr, IOItemCount pages) +{ + unsigned int zone, zoneSize, z; + FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; + + if (addr < fBufferPage) + IOPanic("addr < fBufferPage"); + addr -= fBufferPage; + + // Can't free anything of less than minumum + if (pages < kMinZoneSize) + pages = kMinZoneSize; + + // Can't free anything bigger than 1/2 table + if (pages >= fMapperRegionSize/2) + return; + + // Find the appropriate zone for this allocation + for (zone = 0, zoneSize = kMinZoneSize; pages > zoneSize; zone++) + zoneSize <<= 1; + + // Grab lock that protects the dart + IOLockLock(fTableLock); + + invalidateDART(addr, pages); + + addr /= kActivePerFree; + + // We are freeing a block, check to see if pairs are available for + // coalescing. We will walk up the entire chain if we can. + for (z = zone; z < fNumZones; z++) { + ppnum_t pair = addr ^ (kMinZoneSize/2 << z); // Find pair address + if (freeDART[pair].fValid || freeDART[pair].fInUse || (freeDART[pair].fSize != z)) + break; + + // The paired alloc entry is free if we are here + ppnum_t next = freeDART[pair].fNext; + ppnum_t prev = freeDART[pair].fPrev; + + // Remove the pair from its freeList + if (prev) + freeDART[prev].fNext = next; + else + fFreeLists[z] = next; + + if (next) + freeDART[next].fPrev = prev; + + // Sort the addr and the pair + if (addr > pair) + addr = pair; + } + + DEBG("vf: 0x%x, %d, z %d, head %x, new %x\n", addr * kActivePerFree + fBufferPage, pages, z, fFreeLists[z], addr); + + // Add the allocation entry into it's free list and re-init it + freeDART[addr].fSize = z; + freeDART[addr].fNext = fFreeLists[z]; + if (fFreeLists[z]) + freeDART[fFreeLists[z]].fPrev = addr; + freeDART[addr].fPrev = 0; + fFreeLists[z] = addr; + + fMapperRegionUsed -= pages; + + if (fFreeSleepers) + IOLockWakeup(fTableLock, fFreeLists, /* oneThread */ false); + + IOLockUnlock(fTableLock); +} + +addr64_t IOCopyMapper::mapAddr(IOPhysicalAddress addr) +{ + if (addr < ptoa_32(fBufferPage)) + { + return (addr64_t) addr; // Not mapped by us anyway + } + + addr -= ptoa_32(fBufferPage); + if (addr >= ptoa_32(fMapperRegionSize)) + { + return (addr64_t) addr; // Not mapped by us anyway + } + else + { + ActiveDARTEntry *activeDART = (ActiveDARTEntry *) fTable; + UInt offset = addr & PAGE_MASK; + + ActiveDARTEntry mappedPage = activeDART[atop_32(addr)]; + if (mappedPage.fValid) + { + return (ptoa_64(mappedPage.fPPNum) | offset); + } + + panic("%s::mapAddr(0x%08lx) not mapped for I/O\n", getName(), addr); + return 0; + } +} + +void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page) +{ + addr -= fBufferPage; + addr += offset; // Add the offset page to the base address + + ActiveDARTEntry *activeDART = &fMappings[addr]; + ActiveDARTEntry entry = ACTIVEDARTENTRY(page); + *activeDART = entry; +} + +void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, + ppnum_t *pageList, IOItemCount pageCount) +{ + addr -= fBufferPage; + addr += offset; // Add the offset page to the base address + + IOItemCount i; + ActiveDARTEntry *activeDART = &fMappings[addr]; + + for (i = 0; i < pageCount; i++) + { + ActiveDARTEntry entry = ACTIVEDARTENTRY(pageList[i]); + activeDART[i] = entry; + } +} + +void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, + upl_page_info_t *pageList, IOItemCount pageCount) +{ + addr -= fBufferPage; + addr += offset; // Add the offset page to the base address + + IOItemCount i; + ActiveDARTEntry *activeDART = &fMappings[addr]; + + for (i = 0; i < pageCount; i++) + { + ActiveDARTEntry entry = ACTIVEDARTENTRY(pageList[i].phys_addr); + activeDART[i] = entry; + } +} + + diff --git a/iokit/Kernel/IOCopyMapper.h b/iokit/Kernel/IOCopyMapper.h new file mode 100644 index 000000000..e2a5f5df7 --- /dev/null +++ b/iokit/Kernel/IOCopyMapper.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +// 45678901234567890123456789012345678901234567890123456789012345678901234567890 + +#include + +#include +#include +#include +#include + +// General constants about all VART/DART style Address Re-Mapping Tables +#define kMapperPage (4 * 1024) +#define kTransPerPage (kMapperPage / sizeof(ppnum_t)) + +#define kMinZoneSize 4 // Minimum Zone size in pages +#define kMaxNumZones (31 - 14) // 31 bit mapped in 16K super pages + +class IOCopyMapper : public IOMapper +{ + OSDeclareDefaultStructors(IOCopyMapper); + +// alias the fTable variable into our mappings table +#define fMappings ((ActiveDARTEntry *) super::fTable) + +private: + + UInt32 fFreeLists[kMaxNumZones]; + + IOLock *fTableLock; + + void *fDummyPage; + + UInt32 fNumZones; + UInt32 fMapperRegionSize; + UInt32 fMapperRegionUsed; + UInt32 fMapperRegionMaxUsed; + UInt32 fFreeSleepers; + ppnum_t fDummyPageNumber; + ppnum_t fBufferPage; + + // Internal functions + + void breakUp(unsigned start, unsigned end, unsigned freeInd); + void invalidateDART(ppnum_t pnum, IOItemCount size); + void tlbInvalidate(ppnum_t pnum, IOItemCount size); + + virtual void free(); + + virtual bool initHardware(IOService * provider); +public: + virtual ppnum_t iovmAlloc(IOItemCount pages); + virtual void iovmFree(ppnum_t addr, IOItemCount pages); + + virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page); + virtual void iovmInsert(ppnum_t addr, IOItemCount offset, + ppnum_t *pageList, IOItemCount pageCount); + virtual void iovmInsert(ppnum_t addr, IOItemCount offset, + upl_page_info_t *pageList, IOItemCount pageCount); + + virtual addr64_t mapAddr(IOPhysicalAddress addr); +}; + +extern IOCopyMapper * gIOCopyMapper; diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp new file mode 100644 index 000000000..2b79fb980 --- /dev/null +++ b/iokit/Kernel/IODMACommand.cpp @@ -0,0 +1,964 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "IOKitKernelInternal.h" +#include "IOCopyMapper.h" + +#define MAPTYPE(type) ((UInt) (type) & kTypeMask) +#define IS_MAPPED(type) (MAPTYPE(type) == kMapped) +#define IS_BYPASSED(type) (MAPTYPE(type) == kBypassed) +#define IS_NONCOHERENT(type) (MAPTYPE(type) == kNonCoherent) + + +static bool gIOEnableCopyMapper = true; + +enum +{ + kWalkSyncIn = 0x01, // bounce -> md + kWalkSyncOut = 0x02, // bounce <- md + kWalkSyncAlways = 0x04, + kWalkPreflight = 0x08, + kWalkDoubleBuffer = 0x10, + kWalkPrepare = 0x20, + kWalkComplete = 0x40, + kWalkClient = 0x80 +}; + +struct ExpansionData +{ + IOMDDMAWalkSegmentState fState; + IOMDDMACharacteristics fMDSummary; + + UInt64 fPreparedOffset; + UInt64 fPreparedLength; + + UInt8 fCursor; + UInt8 fCheckAddressing; + UInt8 fIterateOnly; + UInt8 fMisaligned; + UInt8 fCopyContig; + UInt8 fPrepared; + UInt8 fDoubleBuffer; + UInt8 __pad[1]; + + ppnum_t fCopyPageAlloc; + ppnum_t fCopyPageCount; + addr64_t fCopyNext; + + class IOBufferMemoryDescriptor * fCopyMD; +}; +typedef ExpansionData IODMACommandInternal; + +#define fInternalState reserved +#define fState reserved->fState +#define fMDSummary reserved->fMDSummary + + +#if 1 +// no direction => OutIn +#define SHOULD_COPY_DIR(op, direction) \ + ((kIODirectionNone == (direction)) \ + || (kWalkSyncAlways & (op)) \ + || (((kWalkSyncIn & (op)) ? kIODirectionIn : kIODirectionOut) \ + & (direction))) + +#else +#define SHOULD_COPY_DIR(state, direction) (true) +#endif + +#if 0 +#define DEBG(fmt, args...) { kprintf(fmt, ## args); } +#else +#define DEBG(fmt, args...) {} +#endif + + +/**************************** class IODMACommand ***************************/ + +#undef super +#define super OSObject +OSDefineMetaClassAndStructors(IODMACommand, IOCommand); + +OSMetaClassDefineReservedUnused(IODMACommand, 0); +OSMetaClassDefineReservedUnused(IODMACommand, 1); +OSMetaClassDefineReservedUnused(IODMACommand, 2); +OSMetaClassDefineReservedUnused(IODMACommand, 3); +OSMetaClassDefineReservedUnused(IODMACommand, 4); +OSMetaClassDefineReservedUnused(IODMACommand, 5); +OSMetaClassDefineReservedUnused(IODMACommand, 6); +OSMetaClassDefineReservedUnused(IODMACommand, 7); +OSMetaClassDefineReservedUnused(IODMACommand, 8); +OSMetaClassDefineReservedUnused(IODMACommand, 9); +OSMetaClassDefineReservedUnused(IODMACommand, 10); +OSMetaClassDefineReservedUnused(IODMACommand, 11); +OSMetaClassDefineReservedUnused(IODMACommand, 12); +OSMetaClassDefineReservedUnused(IODMACommand, 13); +OSMetaClassDefineReservedUnused(IODMACommand, 14); +OSMetaClassDefineReservedUnused(IODMACommand, 15); + +IODMACommand * +IODMACommand::withSpecification(SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mappingOptions, + UInt64 maxTransferSize, + UInt32 alignment, + IOMapper *mapper, + void *refCon) +{ + IODMACommand * me = new IODMACommand; + + if (me && !me->initWithSpecification(outSegFunc, + numAddressBits, maxSegmentSize, + mappingOptions, maxTransferSize, + alignment, mapper, refCon)) + { + me->release(); + return 0; + }; + + return me; +} + +IODMACommand * +IODMACommand::cloneCommand(void *refCon) +{ + return withSpecification(fOutSeg, fNumAddressBits, fMaxSegmentSize, + fMappingOptions, fMaxTransferSize, fAlignMask + 1, fMapper, refCon); +} + +#define kLastOutputFunction ((SegmentFunction) kLastOutputFunction) + +bool +IODMACommand::initWithSpecification(SegmentFunction outSegFunc, + UInt8 numAddressBits, + UInt64 maxSegmentSize, + MappingOptions mappingOptions, + UInt64 maxTransferSize, + UInt32 alignment, + IOMapper *mapper, + void *refCon) +{ + if (!super::init() || !outSegFunc || !numAddressBits) + return false; + + bool is32Bit = (OutputHost32 == outSegFunc || OutputBig32 == outSegFunc + || OutputLittle32 == outSegFunc); + if (is32Bit) + { + if (!numAddressBits) + numAddressBits = 32; + else if (numAddressBits > 32) + return false; // Wrong output function for bits + } + + if (numAddressBits && (numAddressBits < PAGE_SHIFT)) + return false; + + if (!maxSegmentSize) + maxSegmentSize--; // Set Max segment to -1 + if (!maxTransferSize) + maxTransferSize--; // Set Max transfer to -1 + + if (!mapper) + { + IOMapper::checkForSystemMapper(); + mapper = IOMapper::gSystem; + } + + fNumSegments = 0; + fBypassMask = 0; + fOutSeg = outSegFunc; + fNumAddressBits = numAddressBits; + fMaxSegmentSize = maxSegmentSize; + fMappingOptions = mappingOptions; + fMaxTransferSize = maxTransferSize; + if (!alignment) + alignment = 1; + fAlignMask = alignment - 1; + fMapper = mapper; + fRefCon = refCon; + + switch (MAPTYPE(mappingOptions)) + { + case kMapped: break; + case kNonCoherent: fMapper = 0; break; + case kBypassed: + if (mapper && !mapper->getBypassMask(&fBypassMask)) + return false; + break; + default: + return false; + }; + + reserved = IONew(ExpansionData, 1); + if (!reserved) + return false; + bzero(reserved, sizeof(ExpansionData)); + + fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions)); + + return true; +} + +void +IODMACommand::free() +{ + if (reserved) + IODelete(reserved, ExpansionData, 1); + + super::free(); +} + +IOReturn +IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepare) +{ + if (mem == fMemory) + { + if (!autoPrepare) + { + while (fActive) + complete(); + } + return kIOReturnSuccess; + } + + if (fMemory) { + // As we are almost certainly being called from a work loop thread + // if fActive is true it is probably not a good time to potentially + // block. Just test for it and return an error + if (fActive) + return kIOReturnBusy; + clearMemoryDescriptor(); + }; + + if (mem) { + bzero(&fMDSummary, sizeof(fMDSummary)); + IOReturn rtn = mem->dmaCommandOperation( + kIOMDGetCharacteristics, + &fMDSummary, sizeof(fMDSummary)); + if (rtn) + return rtn; + + ppnum_t highPage = fMDSummary.fHighestPage ? fMDSummary.fHighestPage : gIOLastPage; + + if ((kMapped == MAPTYPE(fMappingOptions)) + && fMapper + && (!fNumAddressBits || (fNumAddressBits >= 31))) + // assuming mapped space is 2G + fInternalState->fCheckAddressing = false; + else + fInternalState->fCheckAddressing = (fNumAddressBits && (highPage >= (1UL << (fNumAddressBits - PAGE_SHIFT)))); + + mem->retain(); + fMemory = mem; + + if (autoPrepare) + return prepare(); + }; + + return kIOReturnSuccess; +} + +IOReturn +IODMACommand::clearMemoryDescriptor(bool autoComplete) +{ + if (fActive && !autoComplete) + return (kIOReturnNotReady); + + if (fMemory) { + while (fActive) + complete(); + fMemory->release(); + fMemory = 0; + } + + return (kIOReturnSuccess); +} + +const IOMemoryDescriptor * +IODMACommand::getMemoryDescriptor() const +{ + return fMemory; +} + + +IOReturn +IODMACommand::segmentOp( + void *reference, + IODMACommand *target, + Segment64 segment, + void *segments, + UInt32 segmentIndex) +{ + IOOptionBits op = (IOOptionBits) reference; + addr64_t maxPhys, address; + addr64_t remapAddr = 0; + uint64_t length; + uint32_t numPages; + + IODMACommandInternal * state = target->reserved; + + if (target->fNumAddressBits && (target->fNumAddressBits < 64)) + maxPhys = (1ULL << target->fNumAddressBits); + else + maxPhys = 0; + maxPhys--; + + address = segment.fIOVMAddr; + length = segment.fLength; + + assert(address); + assert(length); + + if (!state->fMisaligned) + { + state->fMisaligned |= (0 != (target->fAlignMask & address)); + if (state->fMisaligned) DEBG("misaligned %qx:%qx, %lx\n", address, length, target->fAlignMask); + } + + if (state->fMisaligned && (kWalkPreflight & op)) + return (kIOReturnNotAligned); + + if (!state->fDoubleBuffer) + { + if ((address + length - 1) <= maxPhys) + { + length = 0; + } + else if (address <= maxPhys) + { + DEBG("tail %qx, %qx", address, length); + length = (address + length - maxPhys - 1); + address = maxPhys + 1; + DEBG("-> %qx, %qx\n", address, length); + } + } + + if (!length) + return (kIOReturnSuccess); + + numPages = atop_64(round_page_64(length)); + remapAddr = state->fCopyNext; + + if (kWalkPreflight & op) + { + state->fCopyPageCount += numPages; + } + else + { + if (kWalkPrepare & op) + { + for (IOItemCount idx = 0; idx < numPages; idx++) + gIOCopyMapper->iovmInsert(atop_64(remapAddr), idx, atop_64(address) + idx); + } + if (state->fDoubleBuffer) + state->fCopyNext += length; + else + { + state->fCopyNext += round_page(length); + remapAddr += (address & PAGE_MASK); + } + + if (SHOULD_COPY_DIR(op, target->fMDSummary.fDirection)) + { + DEBG("cpv: 0x%qx %s 0x%qx, 0x%qx, 0x%02lx\n", remapAddr, + (kWalkSyncIn & op) ? "->" : "<-", + address, length, op); + if (kWalkSyncIn & op) + { // cppvNoModSnk + copypv(remapAddr, address, length, + cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); + } + else + { + copypv(address, remapAddr, length, + cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); + } + } + } + + return kIOReturnSuccess; +} + +IOReturn +IODMACommand::walkAll(UInt8 op) +{ + IODMACommandInternal * state = fInternalState; + + IOReturn ret = kIOReturnSuccess; + UInt32 numSegments; + UInt64 offset; + + if (gIOEnableCopyMapper && (kWalkPreflight & op)) + { + state->fCopyContig = false; + state->fMisaligned = false; + state->fDoubleBuffer = false; + state->fPrepared = false; + state->fCopyNext = 0; + state->fCopyPageAlloc = 0; + state->fCopyPageCount = 0; + state->fCopyMD = 0; + + if (!(kWalkDoubleBuffer & op)) + { + offset = 0; + numSegments = 0-1; + ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + } + + op &= ~kWalkPreflight; + + state->fDoubleBuffer = (state->fMisaligned || (kWalkDoubleBuffer & op)); + if (state->fDoubleBuffer) + state->fCopyPageCount = atop_64(round_page(state->fPreparedLength)); + + if (state->fCopyPageCount) + { + IOMapper * mapper; + ppnum_t mapBase = 0; + + DEBG("preflight fCopyPageCount %d\n", state->fCopyPageCount); + + mapper = gIOCopyMapper; + if (mapper) + mapBase = mapper->iovmAlloc(state->fCopyPageCount); + if (mapBase) + { + state->fCopyPageAlloc = mapBase; + if (state->fCopyPageAlloc && state->fDoubleBuffer) + { + DEBG("contig copy map\n"); + state->fCopyContig = true; + } + + state->fCopyNext = ptoa_64(state->fCopyPageAlloc); + offset = 0; + numSegments = 0-1; + ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + state->fPrepared = true; + op &= ~(kWalkSyncIn | kWalkSyncOut); + } + else + { + DEBG("alloc IOBMD\n"); + state->fCopyMD = IOBufferMemoryDescriptor::withOptions( + fMDSummary.fDirection, state->fPreparedLength, page_size); + + if (state->fCopyMD) + { + ret = kIOReturnSuccess; + state->fPrepared = true; + } + else + { + DEBG("IODMACommand !iovmAlloc"); + return (kIOReturnNoResources); + } + } + } + } + + if (gIOEnableCopyMapper && state->fPrepared && ((kWalkSyncIn | kWalkSyncOut) & op)) + { + if (state->fCopyPageCount) + { + DEBG("sync fCopyPageCount %d\n", state->fCopyPageCount); + + if (state->fCopyPageAlloc) + { + state->fCopyNext = ptoa_64(state->fCopyPageAlloc); + offset = 0; + numSegments = 0-1; + ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + } + else if (state->fCopyMD) + { + DEBG("sync IOBMD\n"); + + if (SHOULD_COPY_DIR(op, fMDSummary.fDirection)) + { + IOMemoryDescriptor *poMD = const_cast(fMemory); + + IOByteCount bytes; + + if (kWalkSyncIn & op) + bytes = poMD->writeBytes(state->fPreparedOffset, + state->fCopyMD->getBytesNoCopy(), + state->fPreparedLength); + else + bytes = poMD->readBytes(state->fPreparedOffset, + state->fCopyMD->getBytesNoCopy(), + state->fPreparedLength); + DEBG("fCopyMD %s %lx bytes\n", (kWalkSyncIn & op) ? "wrote" : "read", bytes); + ret = (bytes == state->fPreparedLength) ? kIOReturnSuccess : kIOReturnUnderrun; + } + else + ret = kIOReturnSuccess; + } + } + } + + if (kWalkComplete & op) + { + if (state->fCopyPageAlloc) + { + gIOCopyMapper->iovmFree(state->fCopyPageAlloc, state->fCopyPageCount); + state->fCopyPageAlloc = 0; + state->fCopyPageCount = 0; + } + if (state->fCopyMD) + { + state->fCopyMD->release(); + state->fCopyMD = 0; + } + + state->fPrepared = false; + } + return (ret); +} + +IOReturn +IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchronize) +{ + IODMACommandInternal * state = fInternalState; + IOReturn ret = kIOReturnSuccess; + + if (!length) + length = fMDSummary.fLength; + + if (length > fMaxTransferSize) + return kIOReturnNoSpace; + +#if 0 + if (IS_NONCOHERENT(mappingOptions) && flushCache) { + IOMemoryDescriptor *poMD = const_cast(fMemory); + + poMD->performOperation(kIOMemoryIncoherentIOStore, 0, fMDSummary.fLength); + } +#endif + if (fActive++) + { + if ((state->fPreparedOffset != offset) + || (state->fPreparedLength != length)) + ret = kIOReturnNotReady; + } + else + { + state->fPreparedOffset = offset; + state->fPreparedLength = length; + + state->fCopyContig = false; + state->fMisaligned = false; + state->fDoubleBuffer = false; + state->fPrepared = false; + state->fCopyNext = 0; + state->fCopyPageAlloc = 0; + state->fCopyPageCount = 0; + state->fCopyMD = 0; + + state->fCursor = state->fIterateOnly + || (!state->fCheckAddressing + && (!fAlignMask + || ((fMDSummary.fPageAlign & (1 << 31)) && (0 == (fMDSummary.fPageAlign & fAlignMask))))); + if (!state->fCursor) + { + IOOptionBits op = kWalkPrepare | kWalkPreflight; + if (synchronize) + op |= kWalkSyncOut; + ret = walkAll(op); + } + if (kIOReturnSuccess == ret) + state->fPrepared = true; + } + return ret; +} + +IOReturn +IODMACommand::complete(bool invalidateCache, bool synchronize) +{ + IODMACommandInternal * state = fInternalState; + IOReturn ret = kIOReturnSuccess; + + if (fActive < 1) + return kIOReturnNotReady; + + if (!--fActive) + { + if (!state->fCursor) + { + IOOptionBits op = kWalkComplete; + if (synchronize) + op |= kWalkSyncIn; + ret = walkAll(op); + } + state->fPrepared = false; + +#if 0 + if (IS_NONCOHERENT(fMappingOptions) && invalidateCache) + { + // XXX gvdl: need invalidate before Chardonnay ships + IOMemoryDescriptor *poMD = const_cast(fMemory); + + poMD->performOperation(kIOMemoryIncoherentIOInvalidate, 0, fMDSummary.fLength); + } +#endif + } + + return ret; +} + +IOReturn +IODMACommand::synchronize(IOOptionBits options) +{ + IODMACommandInternal * state = fInternalState; + IOReturn ret = kIOReturnSuccess; + IOOptionBits op; + + if (kIODirectionOutIn == (kIODirectionOutIn & options)) + return kIOReturnBadArgument; + + if (fActive < 1) + return kIOReturnNotReady; + + op = 0; + if (kForceDoubleBuffer & options) + { + if (state->fDoubleBuffer) + return kIOReturnSuccess; + if (state->fCursor) + state->fCursor = false; + else + ret = walkAll(kWalkComplete); + + op |= kWalkPrepare | kWalkPreflight | kWalkDoubleBuffer; + } + else if (state->fCursor) + return kIOReturnSuccess; + + if (kIODirectionIn & options) + op |= kWalkSyncIn | kWalkSyncAlways; + else if (kIODirectionOut & options) + op |= kWalkSyncOut | kWalkSyncAlways; + + ret = walkAll(op); + + return ret; +} + +IOReturn +IODMACommand::genIOVMSegments(UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP) +{ + return (genIOVMSegments(clientOutputSegment, (void *) kWalkClient, offsetP, segmentsP, numSegmentsP)); +} + +IOReturn +IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc, + void *reference, + UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP) +{ + IOOptionBits op = (IOOptionBits) reference; + IODMACommandInternal * internalState = fInternalState; + IOOptionBits mdOp = kIOMDWalkSegments; + IOReturn ret = kIOReturnSuccess; + + if (!(kWalkComplete & op) && !fActive) + return kIOReturnNotReady; + + if (!offsetP || !segmentsP || !numSegmentsP || !*numSegmentsP) + return kIOReturnBadArgument; + + IOMDDMAWalkSegmentArgs *state = + (IOMDDMAWalkSegmentArgs *) fState; + + UInt64 offset = *offsetP + internalState->fPreparedOffset; + UInt64 memLength = internalState->fPreparedOffset + internalState->fPreparedLength; + + if (offset >= memLength) + return kIOReturnOverrun; + + if (!offset || offset != state->fOffset) { + state->fOffset = 0; + state->fIOVMAddr = 0; + state->fMapped = (IS_MAPPED(fMappingOptions) && fMapper); + mdOp = kIOMDFirstSegment; + }; + + UInt64 bypassMask = fBypassMask; + UInt32 segIndex = 0; + UInt32 numSegments = *numSegmentsP; + Segment64 curSeg = { 0, 0 }; + addr64_t maxPhys; + + if (fNumAddressBits && (fNumAddressBits < 64)) + maxPhys = (1ULL << fNumAddressBits); + else + maxPhys = 0; + maxPhys--; + + while ((state->fIOVMAddr) || state->fOffset < memLength) + { + if (!state->fIOVMAddr) { + + IOReturn rtn; + + state->fOffset = offset; + state->fLength = memLength - offset; + + if (internalState->fCopyContig && (kWalkClient & op)) + { + state->fIOVMAddr = ptoa_64(internalState->fCopyPageAlloc) + + offset - internalState->fPreparedOffset; + rtn = kIOReturnSuccess; + } + else + { + const IOMemoryDescriptor * memory = + internalState->fCopyMD ? internalState->fCopyMD : fMemory; + rtn = memory->dmaCommandOperation(mdOp, fState, sizeof(fState)); + mdOp = kIOMDWalkSegments; + } + + if (rtn == kIOReturnSuccess) { + assert(state->fIOVMAddr); + assert(state->fLength); + } + else if (rtn == kIOReturnOverrun) + state->fIOVMAddr = state->fLength = 0; // At end + else + return rtn; + }; + + if (!curSeg.fIOVMAddr) { + UInt64 length = state->fLength; + + offset += length; + curSeg.fIOVMAddr = state->fIOVMAddr | bypassMask; + curSeg.fLength = length; + state->fIOVMAddr = 0; + } + else if ((curSeg.fIOVMAddr + curSeg.fLength == state->fIOVMAddr)) { + UInt64 length = state->fLength; + offset += length; + curSeg.fLength += length; + state->fIOVMAddr = 0; + }; + + + if (!state->fIOVMAddr) + { + if (kWalkClient & op) + { + if ((curSeg.fIOVMAddr + curSeg.fLength - 1) > maxPhys) + { + if (internalState->fCursor) + { + curSeg.fIOVMAddr = 0; + ret = kIOReturnMessageTooLarge; + break; + } + else if (curSeg.fIOVMAddr <= maxPhys) + { + UInt64 remain, newLength; + + newLength = (maxPhys + 1 - curSeg.fIOVMAddr); + DEBG("trunc %qx, %qx-> %qx\n", curSeg.fIOVMAddr, curSeg.fLength, newLength); + remain = curSeg.fLength - newLength; + state->fIOVMAddr = newLength + curSeg.fIOVMAddr; + curSeg.fLength = newLength; + state->fLength = remain; + offset -= remain; + } + else if (gIOCopyMapper) + { + DEBG("sparse switch %qx, %qx ", curSeg.fIOVMAddr, curSeg.fLength); + // Cache this! + for (UInt checkRemapIndex = 0; checkRemapIndex < internalState->fCopyPageCount; checkRemapIndex++) + { + if (trunc_page_64(curSeg.fIOVMAddr) == gIOCopyMapper->mapAddr( + ptoa_64(internalState->fCopyPageAlloc + checkRemapIndex))) + { + curSeg.fIOVMAddr = ptoa_64(internalState->fCopyPageAlloc + checkRemapIndex) + (curSeg.fIOVMAddr & PAGE_MASK); + break; + } + } + DEBG("-> %qx, %qx\n", curSeg.fIOVMAddr, curSeg.fLength); + } + } + } + + if (curSeg.fLength > fMaxSegmentSize) + { + UInt64 remain = curSeg.fLength - fMaxSegmentSize; + + state->fIOVMAddr = fMaxSegmentSize + curSeg.fIOVMAddr; + curSeg.fLength = fMaxSegmentSize; + + state->fLength = remain; + offset -= remain; + } + + if (internalState->fCursor + && (0 != (fAlignMask & curSeg.fIOVMAddr))) + { + curSeg.fIOVMAddr = 0; + ret = kIOReturnNotAligned; + break; + } + + if (offset >= memLength) + { + curSeg.fLength -= (offset - memLength); + offset = memLength; + state->fIOVMAddr = state->fLength = 0; // At end + break; + } + } + + if (state->fIOVMAddr) { + if ((segIndex + 1 == numSegments)) + break; + + ret = (*outSegFunc)(reference, this, curSeg, segmentsP, segIndex++); + curSeg.fIOVMAddr = 0; + if (kIOReturnSuccess != ret) + break; + } + } + + if (curSeg.fIOVMAddr) { + ret = (*outSegFunc)(reference, this, curSeg, segmentsP, segIndex++); + } + + if (kIOReturnSuccess == ret) + { + state->fOffset = offset; + *offsetP = offset - internalState->fPreparedOffset; + *numSegmentsP = segIndex; + } + return ret; +} + +IOReturn +IODMACommand::clientOutputSegment( + void *reference, IODMACommand *target, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + IOReturn ret = kIOReturnSuccess; + + if ((target->fNumAddressBits < 64) + && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits)) + { + DEBG("kIOReturnMessageTooLarge(fNumAddressBits) %qx, %qx\n", segment.fIOVMAddr, segment.fLength); + ret = kIOReturnMessageTooLarge; + } + + if (!(*target->fOutSeg)(target, segment, vSegList, outSegIndex)) + { + DEBG("kIOReturnMessageTooLarge(fOutSeg) %qx, %qx\n", segment.fIOVMAddr, segment.fLength); + ret = kIOReturnMessageTooLarge; + } + + return (ret); +} + +bool +IODMACommand::OutputHost32(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + Segment32 *base = (Segment32 *) vSegList; + base[outSegIndex].fIOVMAddr = (UInt32) segment.fIOVMAddr; + base[outSegIndex].fLength = (UInt32) segment.fLength; + return true; +} + +bool +IODMACommand::OutputBig32(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + const UInt offAddr = outSegIndex * sizeof(Segment32); + const UInt offLen = offAddr + sizeof(UInt32); + OSWriteBigInt32(vSegList, offAddr, (UInt32) segment.fIOVMAddr); + OSWriteBigInt32(vSegList, offLen, (UInt32) segment.fLength); + return true; +} + +bool +IODMACommand::OutputLittle32(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + const UInt offAddr = outSegIndex * sizeof(Segment32); + const UInt offLen = offAddr + sizeof(UInt32); + OSWriteLittleInt32(vSegList, offAddr, (UInt32) segment.fIOVMAddr); + OSWriteLittleInt32(vSegList, offLen, (UInt32) segment.fLength); + return true; +} + +bool +IODMACommand::OutputHost64(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + Segment64 *base = (Segment64 *) vSegList; + base[outSegIndex] = segment; + return true; +} + +bool +IODMACommand::OutputBig64(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + const UInt offAddr = outSegIndex * sizeof(Segment64); + const UInt offLen = offAddr + sizeof(UInt64); + OSWriteBigInt64(vSegList, offAddr, (UInt64) segment.fIOVMAddr); + OSWriteBigInt64(vSegList, offLen, (UInt64) segment.fLength); + return true; +} + +bool +IODMACommand::OutputLittle64(IODMACommand *, + Segment64 segment, void *vSegList, UInt32 outSegIndex) +{ + const UInt offAddr = outSegIndex * sizeof(Segment64); + const UInt offLen = offAddr + sizeof(UInt64); + OSWriteLittleInt64(vSegList, offAddr, (UInt64) segment.fIOVMAddr); + OSWriteLittleInt64(vSegList, offLen, (UInt64) segment.fLength); + return true; +} + + diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index a238b773c..5c3afed44 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -302,7 +302,11 @@ static void FreePhysicalMemory( vm_offset_t * range ) { vm_offset_t virt; +#if defined (__i386__) + virt = ml_boot_ptovirt( range[0] ); +#else virt = ml_static_ptovirt( range[0] ); +#endif if( virt) { ml_static_mfree( virt, range[1] ); } diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 0d5fdbacd..4beb2606c 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -151,9 +151,7 @@ to restrict I/O ops. #include #include #include // (FWRITE, ...) -extern "C" { #include -} #include #include @@ -194,6 +192,9 @@ uint32_t gIOHibernateFreeTime = 0*1000; // max time to spend freeing pages (m static IODTNVRAM * gIOOptionsEntry; static IORegistryEntry * gIOChosenEntry; +#ifdef __i386__ +static const OSSymbol * gIOCreateEFIDevicePathSymbol; +#endif static IOPolledFileIOVars gFileVars; static IOHibernateVars gIOHibernateVars; @@ -337,21 +338,27 @@ hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_ } static vm_offset_t -hibernate_page_list_iterate(hibernate_page_list_t * list, - void ** iterator, vm_offset_t * ppnum) +hibernate_page_list_iterate(hibernate_page_list_t * list, vm_offset_t * pPage) { - uint32_t count, idx; - - idx = (uint32_t) *iterator; + uint32_t page = *pPage; + uint32_t count; + hibernate_bitmap_t * bitmap; - if (!idx) - idx = hibernate_page_list_count(list, TRUE, idx); + while ((bitmap = hibernate_page_bitmap_pin(list, &page))) + { + count = hibernate_page_bitmap_count(bitmap, TRUE, page); + if (!count) + break; + page += count; + if (page <= bitmap->last_page) + break; + } - *ppnum = idx; - count = hibernate_page_list_count(list, FALSE, idx); - idx += count; - idx += hibernate_page_list_count(list, TRUE, idx); - *iterator = (void *) idx; + *pPage = page; + if (bitmap) + count = hibernate_page_bitmap_count(bitmap, FALSE, page); + else + count = 0; return (count); } @@ -621,7 +628,15 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, do { IOPolledInterface * poller; - if ((poller = OSDynamicCast(IOPolledInterface, next->getProperty(kIOPolledInterfaceSupportKey)))) + OSObject * obj; + + obj = next->getProperty(kIOPolledInterfaceSupportKey); + if (kOSBooleanFalse == obj) + { + vars->pollers->flushCollection(); + break; + } + else if ((poller = OSDynamicCast(IOPolledInterface, obj))) vars->pollers->setObject(poller); if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) vars->blockSize = num->unsigned32BitValue(); @@ -647,21 +662,42 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, *fileExtents = extentsData; // make imagePath - char str1[256]; - char str2[24]; - int len = sizeof(str1); - if ((extentsData->getLength() >= sizeof(IOPolledFileExtent)) - && part->getPath(str1, &len, gIODTPlane)) + if ((extentsData->getLength() >= sizeof(IOPolledFileExtent))) { - // (strip the plane name) - char * tail = strchr(str1, ':'); - if (!tail) - tail = str1 - 1; - data = OSData::withBytes(tail + 1, strlen(tail + 1)); - sprintf(str2, ",%qx", vars->extentMap[0]); - data->appendBytes(str2, strlen(str2)); + char str2[24]; + +#if __i386__ + if (!gIOCreateEFIDevicePathSymbol) + gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); + + sprintf(str2, "%qx", vars->extentMap[0]); + + err = IOService::getPlatform()->callPlatformFunction( + gIOCreateEFIDevicePathSymbol, false, + (void *) part, (void *) str2, (void *) true, + (void *) &data); +#else + char str1[256]; + int len = sizeof(str1); + + if (!part->getPath(str1, &len, gIODTPlane)) + err = kIOReturnNotFound; + else + { + sprintf(str2, ",%qx", vars->extentMap[0]); + // (strip the plane name) + char * tail = strchr(str1, ':'); + if (!tail) + tail = str1 - 1; + data = OSData::withBytes(tail + 1, strlen(tail + 1)); + data->appendBytes(str2, strlen(str2)); + } +#endif + if (kIOReturnSuccess == err) *imagePath = data; + else + HIBLOG("error 0x%x getting path\n", err); } } while (false); @@ -981,8 +1017,10 @@ IOHibernateSystemSleep(void) do { - vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(0, 4 * page_size, page_size); - vars->ioBuffer = IOBufferMemoryDescriptor::withOptions(0, 2 * kDefaultIOSize, page_size); + vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, + 4 * page_size, page_size); + vars->ioBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, + 2 * kDefaultIOSize, page_size); if (!vars->srcBuffer || !vars->ioBuffer) { @@ -1048,8 +1086,6 @@ IOHibernateSystemSleep(void) if (gIOOptionsEntry) { const OSSymbol * sym; - size_t len; - char valueString[16]; sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); if (sym) @@ -1059,6 +1095,10 @@ IOHibernateSystemSleep(void) } data->release(); +#ifdef __ppc__ + size_t len; + char valueString[16]; + vars->saveBootDevice = gIOOptionsEntry->copyProperty(kIOSelectedBootDeviceKey); if (gIOChosenEntry) { @@ -1074,7 +1114,6 @@ IOHibernateSystemSleep(void) if (str2) str2->release(); } - data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMemorySignatureKey)); if (data) { @@ -1094,7 +1133,57 @@ IOHibernateSystemSleep(void) if (data) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); } - +#endif /* __ppc__ */ +#ifdef __i386__ + struct AppleRTCHibernateVars + { + uint8_t signature[4]; + uint32_t revision; + uint8_t booterSignature[20]; + uint8_t wiredCryptKey[16]; + }; + AppleRTCHibernateVars rtcVars; + + rtcVars.signature[0] = 'A'; + rtcVars.signature[1] = 'A'; + rtcVars.signature[2] = 'P'; + rtcVars.signature[3] = 'L'; + rtcVars.revision = 1; + bcopy(&vars->wiredCryptKey[0], &rtcVars.wiredCryptKey[0], sizeof(rtcVars.wiredCryptKey)); + if (gIOHibernateBootSignature[0]) + { + char c; + uint8_t value = 0; + for (uint32_t i = 0; + (c = gIOHibernateBootSignature[i]) && (i < (sizeof(rtcVars.booterSignature) << 1)); + i++) + { + if (c >= 'a') + c -= 'a' - 10; + else if (c >= 'A') + c -= 'A' - 10; + else if (c >= '0') + c -= '0'; + else + continue; + value = (value << 4) | c; + if (i & 1) + rtcVars.booterSignature[i >> 1] = value; + } + } + data = OSData::withBytes(&rtcVars, sizeof(rtcVars)); + if (data) + { + IOService::getPMRootDomain()->setProperty(kIOHibernateRTCVariablesKey, data); + data->release(); + } + if (gIOChosenEntry) + { + data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey)); + if (data) + gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); + } +#else /* !__i386__ */ if (kIOHibernateModeEncrypt & gIOHibernateMode) { data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); @@ -1112,7 +1201,7 @@ IOHibernateSystemSleep(void) if (sym && data) { char c; - uint8_t value; + uint8_t value = 0; for (uint32_t i = 0; (c = gIOHibernateBootSignature[i]); i++) { if (c >= 'a') @@ -1135,7 +1224,6 @@ IOHibernateSystemSleep(void) data->release(); } } - if (!vars->haveFastBoot) { // set boot volume to zero @@ -1149,6 +1237,7 @@ IOHibernateSystemSleep(void) &newVolume, sizeof(newVolume)); } } +#endif /* !__i386__ */ } // -- @@ -1221,6 +1310,7 @@ IOHibernateSystemWake(void) // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched +#ifdef __ppc__ OSData * data = OSData::withCapacity(4); if (gIOOptionsEntry && data) { @@ -1274,6 +1364,11 @@ IOHibernateSystemWake(void) // just sync the variables in case a later panic syncs nvram (it won't sync variables) gIOOptionsEntry->syncOFVariables(); } +#endif + +#ifdef __i386__ + IOService::getPMRootDomain()->removeProperty(kIOHibernateRTCVariablesKey); +#endif if (vars->srcBuffer) vars->srcBuffer->release(); @@ -1294,6 +1389,13 @@ IOHibernateSystemPostWake(void) { if (gIOHibernateFileRef) { + // invalidate the image file + gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; + int err = kern_write_file(gIOHibernateFileRef, 0, + (caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + if (KERN_SUCCESS != err) + HIBLOG("kern_write_file(%d)\n", err); + kern_close_file_for_direct_io(gIOHibernateFileRef); gIOHibernateFileRef = 0; } @@ -1346,6 +1448,8 @@ hibernate_setup_for_wake(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] + extern "C" boolean_t hibernate_write_image(void) { @@ -1353,6 +1457,8 @@ hibernate_write_image(void) IOHibernateVars * vars = &gIOHibernateVars; IOPolledFileExtent * fileExtents; + C_ASSERT(sizeof(IOHibernateImageHeader) == 512); + uint32_t pageCount, pagesDone; IOReturn err; vm_offset_t ppnum; @@ -1585,21 +1691,15 @@ hibernate_write_image(void) // mark more areas for no save, but these are not available // for trashing during restore + + hibernate_page_list_set_volatile(vars->page_list, vars->page_list_wired, &pageCount); -#if !__i386__ page = atop_32(sectHIBB); count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page; -#else - // XXX - page = atop_32(sectHIBB & 0x3FFFFFFF); - count = atop_32(round_page((sectHIBB + sectSizeHIB) & 0x3FFFFFFF)) - page; -#endif hibernate_set_page_state(vars->page_list, vars->page_list_wired, page, count, kIOHibernatePageStateFree); pageCount -= count; - - if (vars->previewBuffer) for (count = 0; (phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen)); @@ -1613,15 +1713,15 @@ hibernate_write_image(void) src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); - void * iter = 0; - pagesDone = 0; + ppnum = 0; + pagesDone = 0; HIBLOG("writing %d pages\n", pageCount); do { count = hibernate_page_list_iterate(pageType ? vars->page_list : vars->page_list_wired, - &iter, &ppnum); + &ppnum); // kprintf("[%d](%x : %x)\n", pageType, ppnum, count); iterDone = !count; @@ -1719,7 +1819,7 @@ hibernate_write_image(void) iterDone = false; pageType = 1; - iter = 0; + ppnum = 0; image1Size = vars->fileVars->position; if (cryptvars) { @@ -1890,10 +1990,6 @@ hibernate_machine_init(void) if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) return; - if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode) - hibernate_page_list_discard(vars->page_list); - - sum = gIOHibernateCurrentHeader->actualImage1Sum; pagesDone = gIOHibernateCurrentHeader->actualUncompressedPages; @@ -1915,7 +2011,14 @@ hibernate_machine_init(void) gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height); - if (vars->videoMapping && gIOHibernateGraphicsInfo->physicalAddress) + if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode) + hibernate_page_list_discard(vars->page_list); + + boot_args *args = (boot_args *) PE_state.bootArgs; + + if (vars->videoMapping + && gIOHibernateGraphicsInfo->physicalAddress + && (args->Video.v_baseAddr == gIOHibernateGraphicsInfo->physicalAddress)) { vars->videoMapSize = round_page(gIOHibernateGraphicsInfo->height * gIOHibernateGraphicsInfo->rowBytes); @@ -1924,7 +2027,19 @@ hibernate_machine_init(void) vars->videoMapSize, kIOMapInhibitCache ); } - uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();; + uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); + + if (gIOHibernateWakeMapSize) + { + err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(gIOHibernateWakeMap), + gIOHibernateWakeMapSize); + if (kIOReturnSuccess == err) + hibernate_newruntime_map(src, gIOHibernateWakeMapSize, + gIOHibernateCurrentHeader->systemTableOffset); + gIOHibernateWakeMap = 0; + gIOHibernateWakeMapSize = 0; + } + uint32_t decoOffset; clock_get_uptime(&allTime); diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h index 59783732f..04b5fd3eb 100644 --- a/iokit/Kernel/IOHibernateInternal.h +++ b/iokit/Kernel/IOHibernateInternal.h @@ -91,3 +91,6 @@ extern int sectSizeHIB; extern vm_offset_t sectDATAB; extern int sectSizeDATA; +extern vm_offset_t gIOHibernateWakeMap; // ppnum +extern vm_size_t gIOHibernateWakeMapSize; + diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index 3c6f16866..a61421190 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -48,11 +49,13 @@ hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics; static hibernate_cryptwakevars_t _cryptWakeVars; hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars = &_cryptWakeVars; +vm_offset_t gIOHibernateWakeMap; // ppnum +vm_size_t gIOHibernateWakeMapSize; + #if __i386__ extern void acpi_wake_prot_entry(void); #endif - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define BASE 65521L /* largest prime smaller than 65536 */ @@ -92,29 +95,8 @@ hibernate_sum(uint8_t *buf, int32_t len) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#if __ppc__ -static __inline__ unsigned int cntlzw(unsigned int num) -{ - unsigned int result; - __asm__ volatile("cntlzw %0, %1" : "=r" (result) : "r" (num)); - return result; -} -#elif __i386__ -static __inline__ unsigned int cntlzw(unsigned int num) -{ - unsigned int result; - __asm__ volatile( "bsrl %1, %0\n\t" - "cmovel %2, %0" - : "=r" (result) - : "rm" (num), "r" (63)); - return 31 ^ result; -} -#else -#error arch -#endif - -void -hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page) +static hibernate_bitmap_t * +hibernate_page_bitmap(hibernate_page_list_t * list, uint32_t page) { uint32_t bank; hibernate_bitmap_t * bitmap = &list->bank_bitmap[0]; @@ -122,117 +104,129 @@ hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page for (bank = 0; bank < list->bank_count; bank++) { if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) - { - page -= bitmap->first_page; - if (set) - bitmap->bitmap[page >> 5] |= (0x80000000 >> (page & 31)); - //setbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); - else - bitmap->bitmap[page >> 5] &= ~(0x80000000 >> (page & 31)); - //clrbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); break; - } bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } + if (bank == list->bank_count) + bitmap = 0; + + return (bitmap); } -boolean_t -hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page) +hibernate_bitmap_t * +hibernate_page_bitmap_pin(hibernate_page_list_t * list, uint32_t * pPage) { - boolean_t result = TRUE; - uint32_t bank; + uint32_t bank, page = *pPage; hibernate_bitmap_t * bitmap = &list->bank_bitmap[0]; for (bank = 0; bank < list->bank_count; bank++) { - if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) + if (page <= bitmap->first_page) { - page -= bitmap->first_page; - result = (0 != (bitmap->bitmap[page >> 5] & (0x80000000 >> (page & 31)))); + *pPage = bitmap->first_page; break; } + if (page <= bitmap->last_page) + break; bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } + if (bank == list->bank_count) + bitmap = 0; + + return (bitmap); +} + +void +hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page) +{ + hibernate_bitmap_t * bitmap; + + bitmap = hibernate_page_bitmap(list, page); + if (bitmap) + { + page -= bitmap->first_page; + if (set) + bitmap->bitmap[page >> 5] |= (0x80000000 >> (page & 31)); + //setbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); + else + bitmap->bitmap[page >> 5] &= ~(0x80000000 >> (page & 31)); + //clrbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); + } +} + +boolean_t +hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page) +{ + boolean_t result = TRUE; + hibernate_bitmap_t * bitmap; + + bitmap = hibernate_page_bitmap(list, page); + if (bitmap) + { + page -= bitmap->first_page; + result = (0 != (bitmap->bitmap[page >> 5] & (0x80000000 >> (page & 31)))); + } return (result); } -// count bits clear or set (set == TRUE) starting at index page. +// count bits clear or set (set == TRUE) starting at page. uint32_t -hibernate_page_list_count(hibernate_page_list_t * list, uint32_t set, uint32_t page) +hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t page) { - uint32_t bank, count; - hibernate_bitmap_t * bitmap; + uint32_t index, bit, bits; + uint32_t count; - bitmap = &list->bank_bitmap[0]; - count = 0; + count = 0; - for (bank = 0; bank < list->bank_count; bank++) + index = (page - bitmap->first_page) >> 5; + bit = (page - bitmap->first_page) & 31; + + bits = bitmap->bitmap[index]; + if (set) + bits = ~bits; + bits = (bits << bit); + if (bits) + count += __builtin_clz(bits); + else { - // bits between banks are "set" - if (set && (page < bitmap->first_page)) + count += 32 - bit; + while (++index < bitmap->bitmapwords) { - count += bitmap->first_page - page; - page = bitmap->first_page; - } - if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) - { - uint32_t index, bit, bits; - - index = (page - bitmap->first_page) >> 5; - bit = (page - bitmap->first_page) & 31; - - while (TRUE) + bits = bitmap->bitmap[index]; + if (set) + bits = ~bits; + if (bits) { - bits = bitmap->bitmap[index]; - if (set) - bits = ~bits; - bits = (bits << bit); - count += cntlzw(bits); - if (bits) - break; - count -= bit; - - while (++index < bitmap->bitmapwords) - { - bits = bitmap->bitmap[index]; - if (set) - bits = ~bits; - count += cntlzw(bits); - if (bits) - break; - } - if (bits) - break; - if (!set) - break; - // bits between banks are "set" - bank++; - if (bank >= list->bank_count) - break; - count -= (bitmap->last_page + 1); - bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; - count += bitmap->first_page; - index = 0; - bit = 0; + count += __builtin_clz(bits); + break; } - break; + count += 32; } - bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } return (count); } - -static uint32_t -hibernate_page_list_grab(hibernate_page_list_t * map, uint32_t * _nextFree) +static vm_offset_t +hibernate_page_list_grab(hibernate_page_list_t * list, uint32_t * pNextFree) { - uint32_t nextFree = *_nextFree; + uint32_t nextFree = *pNextFree; + uint32_t nextFreeInBank; + hibernate_bitmap_t * bitmap; - if (!nextFree) - nextFree = hibernate_page_list_count(map, 0, 0); + nextFreeInBank = nextFree + 1; + while ((bitmap = hibernate_page_bitmap_pin(list, &nextFreeInBank))) + { + nextFreeInBank += hibernate_page_bitmap_count(bitmap, FALSE, nextFreeInBank); + if (nextFreeInBank <= bitmap->last_page) + { + *pNextFree = nextFreeInBank; + break; + } + } - *_nextFree = nextFree + 1 + hibernate_page_list_count(map, 0, nextFree + 1); + if (!bitmap) + IOPanic(__FUNCTION__); return (nextFree); } @@ -245,14 +239,8 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, uint32_t sum; dst = ptoa_64(ppnum); -#if __ppc__ if (ppnum < 0x00100000) buffer = (uint32_t *) (uint32_t) dst; -#elif __i386__ - if (ppnum < atop_32(0xC0000000)) { - buffer = (uint32_t *) (uint32_t) dst; - } -#endif if (compressedSize != PAGE_SIZE) { @@ -284,9 +272,11 @@ bcopy_internal(const void *src, void *dst, uint32_t len) } } +#define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] + long hibernate_kernel_entrypoint(IOHibernateImageHeader * header, - void * p2, void * p3, __unused void * p4) + void * p2, void * p3, void * p4) { typedef void (*ResetProc)(void); uint32_t idx; @@ -309,11 +299,18 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, uint32_t lastMapPage; uint32_t lastPageIndexPage; + C_ASSERT(sizeof(IOHibernateImageHeader) == 512); bcopy_internal(header, gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + if (!p2) + { + count = header->graphicsInfoOffset; + if (count) + p2 = (void *)(((uintptr_t) header) - count); + } if (p2) bcopy_internal(p2, gIOHibernateGraphicsInfo, @@ -321,6 +318,12 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, else gIOHibernateGraphicsInfo->physicalAddress = gIOHibernateGraphicsInfo->depth = 0; + if (!p3) + { + count = header->cryptVarsOffset; + if (count) + p3 = (void *)(((uintptr_t) header) - count); + } if (p3) bcopy_internal(p3, gIOHibernateCryptWakeVars, @@ -357,8 +360,19 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, } nextFree = 0; + hibernate_page_list_grab(map, &nextFree); buffer = (uint32_t *) ptoa_32(hibernate_page_list_grab(map, &nextFree)); + if (header->memoryMapSize && (count = header->memoryMapOffset)) + { + p4 = (void *)(((uintptr_t) header) - count); + gIOHibernateWakeMap = hibernate_page_list_grab(map, &nextFree); + gIOHibernateWakeMapSize = header->memoryMapSize; + bcopy_internal(p4, (void *) ptoa_32(gIOHibernateWakeMap), gIOHibernateWakeMapSize); + } + else + gIOHibernateWakeMapSize = 0; + sum = gIOHibernateCurrentHeader->actualRestore1Sum; gIOHibernateCurrentHeader->diag[0] = (uint32_t) header; gIOHibernateCurrentHeader->diag[1] = sum; @@ -407,6 +421,8 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, compressedSize = kIOHibernateTagLength & tag; } +// SINT(ppnum); + conflicts = (((ppnum >= atop_32(map)) && (ppnum <= lastMapPage)) || ((ppnum >= atop_32(src)) && (ppnum <= lastImagePage))); @@ -492,7 +508,8 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, #elif __i386__ ResetProc proc; proc = (ResetProc) acpi_wake_prot_entry; - + // flush caches + __asm__("wbinvd"); proc(); #endif diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index 29c5fd14a..2c8304e33 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -666,7 +666,8 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub, IOInterruptAction IOSharedInterruptController::getInterruptHandlerAddress(void) { - return (IOInterruptAction)&IOSharedInterruptController::handleInterrupt; + return OSMemberFunctionCast(IOInterruptAction, + this, &IOSharedInterruptController::handleInterrupt); } IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/, diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index 9322aa0b5..373783f4f 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -40,17 +40,52 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, vm_map_t IOPageableMapForAddress( vm_address_t address ); SInt32 OSKernelStackRemaining( void ); +mach_vm_address_t +IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment); +void +IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size); + extern vm_size_t debug_iomallocpageable_size; // osfmk/device/iokit_rpc.c // LP64todo - these need to expand -extern kern_return_t IOMapPages( vm_map_t map, vm_offset_t va, vm_offset_t pa, - vm_size_t length, unsigned int mapFlags); -extern kern_return_t IOUnmapPages(vm_map_t map, vm_offset_t va, vm_size_t length); +extern kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t pa, + mach_vm_size_t length, unsigned int options); +extern kern_return_t IOUnmapPages(vm_map_t map, mach_vm_address_t va, mach_vm_size_t length); + +extern kern_return_t IOProtectCacheMode(vm_map_t map, mach_vm_address_t va, + mach_vm_size_t length, unsigned int mapFlags); + +extern ppnum_t IOGetLastPageNumber(void); + +extern ppnum_t gIOLastPage; /* Physical to physical copy (ints must be disabled) */ extern void bcopy_phys(addr64_t from, addr64_t to, int size); __END_DECLS +// Used for dedicated communications for IODMACommand +enum { + kIOMDWalkSegments = 0x00000001, + kIOMDFirstSegment = 0x00000002 | kIOMDWalkSegments, + kIOMDGetCharacteristics = 0x00000004, + kIOMDLastDMACommandOperation +}; +struct IOMDDMACharacteristics { + UInt64 fLength; + UInt32 fSGCount; + UInt32 fPages; + UInt32 fPageAlign; + ppnum_t fHighestPage; + IODirection fDirection; + UInt8 fIsMapped, fIsPrepared; +}; +struct IOMDDMAWalkSegmentArgs { + UInt64 fOffset; // Input/Output offset + UInt64 fIOVMAddr, fLength; // Output variables + UInt8 fMapped; // Input Variable, Require mapped IOVMA +}; +typedef UInt8 IOMDDMAWalkSegmentState[128]; + #endif /* ! _IOKIT_KERNELINTERNAL_H */ diff --git a/iokit/Kernel/IOLib.c b/iokit/Kernel/IOLib.cpp similarity index 81% rename from iokit/Kernel/IOLib.c rename to iokit/Kernel/IOLib.cpp index 0a75d9af3..5f2b9f77c 100644 --- a/iokit/Kernel/IOLib.c +++ b/iokit/Kernel/IOLib.cpp @@ -38,14 +38,27 @@ #include #include #include +#include #include #include "IOKitKernelInternal.h" +extern "C" +{ + + mach_timespec_t IOZeroTvalspec = { 0, 0 }; extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +extern kern_return_t kmem_suballoc( + vm_map_t parent, + vm_offset_t *addr, + vm_size_t size, + boolean_t pageable, + boolean_t anywhere, + vm_map_t *new_map); + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ lck_grp_t *IOLockGroup; @@ -138,9 +151,9 @@ IOThread IOCreateThread(IOThreadFunc fcn, void *arg) } -volatile void IOExitThread(void) +void IOExitThread(void) { - (void) thread_terminate(current_thread()); + (void) thread_terminate(current_thread()); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -264,130 +277,168 @@ void IOFreeAligned(void * address, vm_size_t size) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -struct _IOMallocContiguousEntry +void +IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size) { - void * virtual; - ppnum_t ioBase; - queue_chain_t link; -}; -typedef struct _IOMallocContiguousEntry _IOMallocContiguousEntry; + mach_vm_address_t allocationAddress; + mach_vm_size_t adjustedSize; -void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, - IOPhysicalAddress * physicalAddress) + if (!address) + return; + + assert(size); + + adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); + if (adjustedSize >= page_size) { + + kmem_free( kernel_map, (vm_address_t) address, size); + + } else { + + adjustedSize = *((mach_vm_size_t *) + (address - sizeof(mach_vm_address_t) - sizeof(mach_vm_size_t))); + allocationAddress = *((mach_vm_address_t *) + (address - sizeof(mach_vm_address_t) )); + kfree((void *)allocationAddress, adjustedSize); + } + +#if IOALLOCDEBUG + debug_iomalloc_size -= size; +#endif +} + +mach_vm_address_t +IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment) { kern_return_t kr; - vm_address_t address; - vm_address_t allocationAddress; - vm_size_t adjustedSize; - vm_offset_t alignMask; - ppnum_t pagenum; + mach_vm_address_t address; + mach_vm_address_t allocationAddress; + mach_vm_size_t adjustedSize; + mach_vm_address_t alignMask; if (size == 0) - return 0; + return (0); if (alignment == 0) alignment = 1; alignMask = alignment - 1; - adjustedSize = (2 * size) + sizeof(vm_size_t) + sizeof(vm_address_t); + adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); if (adjustedSize >= page_size) { + vm_offset_t virt; adjustedSize = size; if (adjustedSize > page_size) { - kr = kmem_alloc_contig(kernel_map, &address, size, + kr = kmem_alloc_contig(kernel_map, &virt, size, alignMask, 0); } else { - kr = kernel_memory_allocate(kernel_map, &address, + kr = kernel_memory_allocate(kernel_map, &virt, size, alignMask, 0); } - if (KERN_SUCCESS != kr) + if (KERN_SUCCESS == kr) + address = virt; + else address = 0; } else { adjustedSize += alignMask; - allocationAddress = (vm_address_t) kalloc(adjustedSize); + allocationAddress = (mach_vm_address_t) kalloc(adjustedSize); if (allocationAddress) { address = (allocationAddress + alignMask - + (sizeof(vm_size_t) + sizeof(vm_address_t))) + + (sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t))) & (~alignMask); if (atop_32(address) != atop_32(address + size - 1)) address = round_page_32(address); - *((vm_size_t *)(address - sizeof(vm_size_t) - - sizeof(vm_address_t))) = adjustedSize; - *((vm_address_t *)(address - sizeof(vm_address_t))) + *((mach_vm_size_t *)(address - sizeof(mach_vm_size_t) + - sizeof(mach_vm_address_t))) = adjustedSize; + *((mach_vm_address_t *)(address - sizeof(mach_vm_address_t))) = allocationAddress; } else address = 0; } +#if IOALLOCDEBUG + if (address) + debug_iomalloc_size += size; +#endif + + return (address); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct _IOMallocContiguousEntry +{ + mach_vm_address_t virtualAddr; + IOBufferMemoryDescriptor * md; + queue_chain_t link; +}; +typedef struct _IOMallocContiguousEntry _IOMallocContiguousEntry; + +void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, + IOPhysicalAddress * physicalAddress) +{ + mach_vm_address_t address = 0; + + if (size == 0) + return 0; + if (alignment == 0) + alignment = 1; + /* Do we want a physical address? */ - if (address && physicalAddress) + if (!physicalAddress) { - do + address = IOKernelAllocateContiguous(size, alignment); + } + else do + { + IOBufferMemoryDescriptor * bmd; + mach_vm_address_t physicalMask; + vm_offset_t alignMask; + + alignMask = alignment - 1; + physicalMask = 0xFFFFFFFF ^ (alignMask & PAGE_MASK); + bmd = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + kernel_task, kIOMemoryPhysicallyContiguous, size, physicalMask); + if (!bmd) + break; + + _IOMallocContiguousEntry * + entry = IONew(_IOMallocContiguousEntry, 1); + if (!entry) { - /* Get the physical page */ - pagenum = pmap_find_phys(kernel_pmap, (addr64_t) address); - if(pagenum) - { - IOByteCount offset; - ppnum_t base; - - base = IOMapperIOVMAlloc((size + PAGE_MASK) >> PAGE_SHIFT); - if (base) - { - _IOMallocContiguousEntry * - entry = IONew(_IOMallocContiguousEntry, 1); - if (!entry) - { - IOFreeContiguous((void *) address, size); - address = 0; - break; - } - entry->virtual = (void *) address; - entry->ioBase = base; - lck_mtx_lock(gIOMallocContiguousEntriesLock); - queue_enter( &gIOMallocContiguousEntries, entry, - _IOMallocContiguousEntry *, link ); - lck_mtx_unlock(gIOMallocContiguousEntriesLock); - - *physicalAddress = (IOPhysicalAddress)((base << PAGE_SHIFT) | (address & PAGE_MASK)); - for (offset = 0; offset < ((size + PAGE_MASK) >> PAGE_SHIFT); offset++, pagenum++) - IOMapperInsertPage( base, offset, pagenum ); - } - else - *physicalAddress = (IOPhysicalAddress)((pagenum << PAGE_SHIFT) | (address & PAGE_MASK)); - } - else - /* Did not find, return 0 */ - *physicalAddress = (IOPhysicalAddress) 0; + bmd->release(); + break; } - while (false); + entry->virtualAddr = (mach_vm_address_t) bmd->getBytesNoCopy(); + entry->md = bmd; + lck_mtx_lock(gIOMallocContiguousEntriesLock); + queue_enter( &gIOMallocContiguousEntries, entry, + _IOMallocContiguousEntry *, link ); + lck_mtx_unlock(gIOMallocContiguousEntriesLock); + + address = (mach_vm_address_t) entry->virtualAddr; + *physicalAddress = bmd->getPhysicalAddress(); } - - assert(0 == (address & alignMask)); - -#if IOALLOCDEBUG - if( address) - debug_iomalloc_size += size; -#endif + while (false); return (void *) address; } -void IOFreeContiguous(void * address, vm_size_t size) +void IOFreeContiguous(void * _address, vm_size_t size) { - vm_address_t allocationAddress; - vm_size_t adjustedSize; _IOMallocContiguousEntry * entry; - ppnum_t base = 0; + IOMemoryDescriptor * md = NULL; + + mach_vm_address_t address = (mach_vm_address_t) _address; if( !address) return; @@ -398,8 +449,8 @@ void IOFreeContiguous(void * address, vm_size_t size) queue_iterate( &gIOMallocContiguousEntries, entry, _IOMallocContiguousEntry *, link ) { - if( entry->virtual == address ) { - base = entry->ioBase; + if( entry->virtualAddr == address ) { + md = entry->md; queue_remove( &gIOMallocContiguousEntries, entry, _IOMallocContiguousEntry *, link ); break; @@ -407,29 +458,15 @@ void IOFreeContiguous(void * address, vm_size_t size) } lck_mtx_unlock(gIOMallocContiguousEntriesLock); - if (base) + if (md) { - IOMapperIOVMFree(base, (size + PAGE_MASK) >> PAGE_SHIFT); + md->release(); IODelete(entry, _IOMallocContiguousEntry, 1); } - - adjustedSize = (2 * size) + sizeof(vm_size_t) + sizeof(vm_address_t); - if (adjustedSize >= page_size) { - - kmem_free( kernel_map, (vm_address_t) address, size); - - } else { - adjustedSize = *((vm_size_t *)( (vm_address_t) address - - sizeof(vm_address_t) - sizeof(vm_size_t))); - allocationAddress = *((vm_address_t *)( (vm_address_t) address - - sizeof(vm_address_t) )); - - kfree((void *)allocationAddress, adjustedSize); + else + { + IOKernelFreeContiguous((mach_vm_address_t) address, size); } - -#if IOALLOCDEBUG - debug_iomalloc_size -= size; -#endif } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -595,7 +632,7 @@ IOReturn IOSetProcessorCacheMode( task_t task, IOVirtualAddress address, pagenum = pmap_find_phys(kernel_pmap, (addr64_t)address); if( pagenum) { ret = IOUnmapPages( get_task_map(task), address, page_size ); - ret = IOMapPages( get_task_map(task), address, pagenum << PAGE_SHIFT, page_size, cacheMode ); + ret = IOMapPages( get_task_map(task), address, ptoa_64(pagenum), page_size, cacheMode ); } else ret = kIOReturnVMError; @@ -718,3 +755,5 @@ unsigned int IOAlignmentToSize(IOAlignment align) } return size; } + +} /* extern "C" */ diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index f9be25915..f2f0633c3 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -23,10 +23,16 @@ #include #include +#include "IOCopyMapper.h" + +__BEGIN_DECLS +extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +__END_DECLS + #define super IOService OSDefineMetaClassAndAbstractStructors(IOMapper, IOService); -OSMetaClassDefineReservedUnused(IOMapper, 0); +OSMetaClassDefineReservedUsed(IOMapper, 0); OSMetaClassDefineReservedUnused(IOMapper, 1); OSMetaClassDefineReservedUnused(IOMapper, 2); OSMetaClassDefineReservedUnused(IOMapper, 3); @@ -130,98 +136,44 @@ void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset, iovmInsert(addr, offset + i, pageList[i].phys_addr); } -struct ARTTableData { - void *v; - upl_t u[0]; -}; -#define getARTDataP(data) ((ARTTableData *) (data)->getBytesNoCopy()) - -OSData * -IOMapper::NewARTTable(IOByteCount size, - void ** virtAddrP, ppnum_t *physAddrP) +OSData * IOMapper:: +NewARTTable(IOByteCount size, void ** virtAddrP, ppnum_t *physAddrP) { - OSData *ret; + if (!virtAddrP || !physAddrP) + return 0; + kern_return_t kr; - vm_address_t startUpl; - ARTTableData *dataP; - unsigned int dataSize; - upl_page_info_t *pl = 0; + vm_address_t address; - // Each UPL can deal with about one meg at the moment size = round_page_32(size); - dataSize = sizeof(ARTTableData) + sizeof(upl_t) * size / (1024 * 1024); - ret = OSData::withCapacity(dataSize); - if (!ret) + kr = kmem_alloc_contig(kernel_map, &address, size, PAGE_MASK, 0); + if (kr) return 0; - // Append 0's to the buffer, in-other-words reset to nulls. - ret->appendBytes(NULL, sizeof(ARTTableData)); - dataP = getARTDataP(ret); + ppnum_t pagenum = pmap_find_phys(kernel_pmap, (addr64_t) address); + if (pagenum) + *physAddrP = pagenum; + else { + FreeARTTable((OSData *) address, size); + address = 0; + } - kr = kmem_alloc_contig(kernel_map, &startUpl, size, PAGE_MASK, 0); - if (kr) - return 0; + *virtAddrP = (void *) address; - dataP->v = (void *) startUpl; - - do { - upl_t iopl; - int upl_flags = UPL_SET_INTERNAL | UPL_SET_LITE - | UPL_SET_IO_WIRE | UPL_COPYOUT_FROM; - vm_size_t iopl_size = size; - - kr = vm_map_get_upl(kernel_map, - (vm_map_offset_t)startUpl, - &iopl_size, - &iopl, - 0, - 0, - &upl_flags, - 0); - if (kr) { - panic("IOMapper:vm_map_get_upl returned 0x%x\n"); - goto bail; - } - - if (!ret->appendBytes(&iopl, sizeof(upl_t))) - goto bail; - - startUpl += iopl_size; - size -= iopl_size; - } while(size); - - // Need to re-establish the dataP as the OSData may have grown. - dataP = getARTDataP(ret); - - // Now grab the page entry of the first page and get its phys addr - pl = UPL_GET_INTERNAL_PAGE_LIST(dataP->u[0]); - *physAddrP = pl->phys_addr; - *virtAddrP = dataP->v; - - return ret; - -bail: - FreeARTTable(ret, size); - return 0; + return (OSData *) address; } void IOMapper::FreeARTTable(OSData *artHandle, IOByteCount size) { - assert(artHandle); - - ARTTableData *dataP = getARTDataP(artHandle); + vm_address_t address = (vm_address_t) artHandle; - int numupls = ((artHandle->getLength() - sizeof(*dataP)) / sizeof(upl_t)); - for (int i = 0; i < numupls; i++) { - upl_abort(dataP->u[i], 0); - upl_deallocate(dataP->u[i]); - } + size = round_page_32(size); + kmem_free(kernel_map, address, size); // Just panic if address is 0 +} - if (dataP->v) { - size = round_page_32(size); - kmem_free(kernel_map, (vm_address_t) dataP->v, size); - } - artHandle->release(); +bool IOMapper::getBypassMask(addr64_t *maskP) const +{ + return false; } __BEGIN_DECLS @@ -385,4 +337,24 @@ void IOMappedWrite64(IOPhysicalAddress address, UInt64 value) ml_phys_write_double((vm_offset_t) address, value); } +mach_vm_address_t IOMallocPhysical(mach_vm_size_t size, mach_vm_address_t mask) +{ + mach_vm_address_t address = 0; + if (gIOCopyMapper) + { + address = ptoa_64(gIOCopyMapper->iovmAlloc(atop_64(round_page(size)))); + } + + return (address); +} + +void IOFreePhysical(mach_vm_address_t address, mach_vm_size_t size) +{ + if (gIOCopyMapper) + { + gIOCopyMapper->iovmFree(atop_64(address), atop_64(round_page(size))); + } +} + + __END_DECLS diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 389d75a1e..fdc02ad8d 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -38,6 +38,7 @@ #include #include "IOKitKernelInternal.h" +#include "IOCopyMapper.h" #include #include @@ -54,11 +55,8 @@ __BEGIN_DECLS #include #include -#ifndef i386 #include #include -struct phys_entry *pmap_find_physentry(ppnum_t pa); -#endif extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); void ipc_port_release_send(ipc_port_t port); @@ -97,9 +95,14 @@ __END_DECLS #define kIOMaximumMappedIOByteCount (512*1024*1024) -static IOMapper * gIOSystemMapper; +static IOMapper * gIOSystemMapper = NULL; + +IOCopyMapper * gIOCopyMapper = NULL; + static ppnum_t gIOMaximumMappedIOPageCount = atop_32(kIOMaximumMappedIOByteCount); +ppnum_t gIOLastPage; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ OSDefineMetaClassAndAbstractStructors( IOMemoryDescriptor, OSObject ) @@ -118,6 +121,12 @@ static IORecursiveLock * gIOMemoryLock; #define WAKEUP \ IORecursiveLockWakeup( gIOMemoryLock, (void *)this, /* one-thread */ false) +#if 0 +#define DEBG(fmt, args...) { kprintf(fmt, ## args); } +#else +#define DEBG(fmt, args...) {} +#endif + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ class _IOMemoryMap : public IOMemoryMap @@ -288,15 +297,21 @@ kern_return_t device_close( // This means that pointers are not passed and NULLs don't have to be // checked for as a NULL reference is illegal. static inline void -getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables +getAddrLenForInd(addr64_t &addr, IOPhysicalLength &len, // Output variables UInt32 type, IOGeneralMemoryDescriptor::Ranges r, UInt32 ind) { - assert(kIOMemoryTypePhysical == type || kIOMemoryTypeUIO == type - || kIOMemoryTypeVirtual == type); + assert(kIOMemoryTypeUIO == type + || kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type + || kIOMemoryTypePhysical == type || kIOMemoryTypePhysical64 == type); if (kIOMemoryTypeUIO == type) { user_size_t us; uio_getiov((uio_t) r.uio, ind, &addr, &us); len = us; } + else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) { + IOAddressRange cur = r.v64[ind]; + addr = cur.address; + len = cur.length; + } else { IOVirtualRange cur = r.v[ind]; addr = cur.address; @@ -328,6 +343,15 @@ IOMemoryDescriptor::withAddress(vm_address_t address, IODirection direction, task_t task) { +#if TEST_V64 + if (task) + { + IOOptionBits options = (IOOptionBits) direction; + if (task == kernel_task) + options |= kIOMemoryAutoPrepare; + return (IOMemoryDescriptor::withAddressRange(address, length, options, task)); + } +#endif IOGeneralMemoryDescriptor * that = new IOGeneralMemoryDescriptor; if (that) { @@ -345,6 +369,9 @@ IOMemoryDescriptor::withPhysicalAddress( IOByteCount length, IODirection direction ) { +#if TEST_P64 + return (IOMemoryDescriptor::withAddressRange(address, length, (IOOptionBits) direction, NULL)); +#endif IOGeneralMemoryDescriptor *self = new IOGeneralMemoryDescriptor; if (self && !self->initWithPhysicalAddress(address, length, direction)) { @@ -373,6 +400,39 @@ IOMemoryDescriptor::withRanges( IOVirtualRange * ranges, return 0; } +IOMemoryDescriptor * +IOMemoryDescriptor::withAddressRange(mach_vm_address_t address, + mach_vm_size_t length, + IOOptionBits options, + task_t task) +{ + IOAddressRange range = { address, length }; + return (IOMemoryDescriptor::withAddressRanges(&range, 1, options, task)); +} + +IOMemoryDescriptor * +IOMemoryDescriptor::withAddressRanges(IOAddressRange * ranges, + UInt32 rangeCount, + IOOptionBits options, + task_t task) +{ + IOGeneralMemoryDescriptor * that = new IOGeneralMemoryDescriptor; + if (that) + { + if (task) + options |= kIOMemoryTypeVirtual64; + else + options |= kIOMemoryTypePhysical64; + + if (that->initWithOptions(ranges, rangeCount, 0, task, options, /* mapper */ 0)) + return that; + + that->release(); + } + + return 0; +} + /* * withRanges: @@ -447,8 +507,8 @@ IOMemoryDescriptor::withSubRange(IOMemoryDescriptor * of, return self; } -IOMemoryDescriptor * IOMemoryDescriptor:: - withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD) +IOMemoryDescriptor * +IOMemoryDescriptor::withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD) { IOGeneralMemoryDescriptor *origGenMD = OSDynamicCast(IOGeneralMemoryDescriptor, originalMD); @@ -460,8 +520,8 @@ IOMemoryDescriptor * IOMemoryDescriptor:: return 0; } -IOMemoryDescriptor * IOGeneralMemoryDescriptor:: - withPersistentMemoryDescriptor(IOGeneralMemoryDescriptor *originalMD) +IOMemoryDescriptor * +IOGeneralMemoryDescriptor::withPersistentMemoryDescriptor(IOGeneralMemoryDescriptor *originalMD) { ipc_port_t sharedMem = (ipc_port_t) originalMD->createNamedEntry(); @@ -671,6 +731,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, switch (type) { case kIOMemoryTypeUIO: case kIOMemoryTypeVirtual: + case kIOMemoryTypeVirtual64: assert(task); if (!task) return false; @@ -678,6 +739,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, break; case kIOMemoryTypePhysical: // Neither Physical nor UPL should have a task + case kIOMemoryTypePhysical64: mapper = kIOMapperNone; case kIOMemoryTypeUPL: @@ -704,10 +766,15 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, while (_wireCount) complete(); - if (_kernPtrAligned) - unmapFromKernel(); if (_ranges.v && _rangesIsAllocated) - IODelete(_ranges.v, IOVirtualRange, _rangesCount); + { + if (kIOMemoryTypeUIO == type) + uio_free((uio_t) _ranges.v); + else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) + IODelete(_ranges.v64, IOAddressRange, _rangesCount); + else + IODelete(_ranges.v, IOVirtualRange, _rangesCount); + } if (_memEntry) { ipc_port_release_send((ipc_port_t) _memEntry); _memEntry = 0; } } @@ -720,7 +787,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // Grab the appropriate mapper if (mapper == kIOMapperNone) mapper = 0; // No Mapper - else if (!mapper) { + else if (mapper == kIOMapperSystem) { IOMapper::checkForSystemMapper(); gIOSystemMapper = mapper = IOMapper::gSystem; } @@ -732,10 +799,12 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // DEPRECATED variable initialisation _direction = (IODirection) (_flags & kIOMemoryDirectionMask); - _position = 0; - _kernPtrAligned = 0; - _cachedPhysicalAddress = 0; - _cachedVirtualAddress = 0; + + __iomd_reservedA = 0; + __iomd_reservedB = 0; + __iomd_reservedC = 0; + + _highestPage = 0; if (kIOMemoryTypeUPL == type) { @@ -755,7 +824,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, dataP->fMapper = mapper; dataP->fPageCnt = 0; - _wireCount++; // UPLs start out life wired + // _wireCount++; // UPLs start out life wired _length = count; _pages += atop_32(offset + count + PAGE_MASK) - atop_32(offset); @@ -767,6 +836,9 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // Set the flag kIOPLOnDevice convieniently equal to 1 iopl.fFlags = pageList->device | kIOPLExternUPL; iopl.fIOMDOffset = 0; + + _highestPage = upl_get_highest_page(iopl.fIOPL); + if (!pageList->device) { // Pre-compute the offset into the UPL's page list pageList = &pageList[atop_32(offset)]; @@ -786,7 +858,8 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, _memoryEntries->appendBytes(&iopl, sizeof(iopl)); } else { - // kIOMemoryTypeVirtual | kIOMemoryTypeUIO | kIOMemoryTypePhysical + // kIOMemoryTypeVirtual | kIOMemoryTypeVirtual64 | kIOMemoryTypeUIO + // kIOMemoryTypePhysical | kIOMemoryTypePhysical64 // Initialize the memory descriptor if (options & kIOMemoryAsReference) { @@ -799,13 +872,27 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, _ranges.v = (IOVirtualRange *) buffers; } else { - assert(kIOMemoryTypeUIO != type); - _rangesIsAllocated = true; - _ranges.v = IONew(IOVirtualRange, count); - if (!_ranges.v) - return false; - bcopy(buffers, _ranges.v, count * sizeof(IOVirtualRange)); + switch (_flags & kIOMemoryTypeMask) + { + case kIOMemoryTypeUIO: + _ranges.v = (IOVirtualRange *) uio_duplicate((uio_t) buffers); + break; + + case kIOMemoryTypeVirtual64: + case kIOMemoryTypePhysical64: + _ranges.v64 = IONew(IOAddressRange, count); + if (!_ranges.v64) + return false; + bcopy(buffers, _ranges.v, count * sizeof(IOAddressRange)); + break; + case kIOMemoryTypeVirtual: + _ranges.v = IONew(IOVirtualRange, count); + if (!_ranges.v) + return false; + bcopy(buffers, _ranges.v, count * sizeof(IOVirtualRange)); + break; + } } // Find starting address within the vector of ranges @@ -820,8 +907,15 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, getAddrLenForInd(addr, len, type, vec, ind); pages += (atop_64(addr + len + PAGE_MASK) - atop_64(addr)); len += length; - assert(len > length); // Check for 32 bit wrap around + assert(len >= length); // Check for 32 bit wrap around length = len; + + if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type)) + { + ppnum_t highPage = atop_64(addr + len - 1); + if (highPage > _highestPage) + _highestPage = highPage; + } } _length = length; _pages = pages; @@ -829,9 +923,9 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // Auto-prepare memory at creation time. // Implied completion when descriptor is free-ed - if (kIOMemoryTypePhysical == type) + if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type)) _wireCount++; // Physical MDs are, by definition, wired - else { /* kIOMemoryTypeVirtual | kIOMemoryTypeUIO */ + else { /* kIOMemoryTypeVirtual | kIOMemoryTypeVirtual64 | kIOMemoryTypeUIO */ ioGMDData *dataP; unsigned dataSize = computeDataSize(_pages, /* upls */ count * 2); @@ -877,10 +971,16 @@ void IOGeneralMemoryDescriptor::free() if (_memoryEntries) _memoryEntries->release(); - if (_kernPtrAligned) - unmapFromKernel(); if (_ranges.v && _rangesIsAllocated) - IODelete(_ranges.v, IOVirtualRange, _rangesCount); + { + IOOptionBits type = _flags & kIOMemoryTypeMask; + if (kIOMemoryTypeUIO == type) + uio_free((uio_t) _ranges.v); + else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) + IODelete(_ranges.v64, IOAddressRange, _rangesCount); + else + IODelete(_ranges.v, IOVirtualRange, _rangesCount); + } if (reserved && reserved->devicePager) device_pager_deallocate( (memory_object_t) reserved->devicePager ); @@ -934,17 +1034,17 @@ IOOptionBits IOMemoryDescriptor::getTag( void ) } // @@@ gvdl: who is using this API? Seems like a wierd thing to implement. -IOPhysicalAddress IOMemoryDescriptor::getSourceSegment( IOByteCount offset, - IOByteCount * length ) +IOPhysicalAddress +IOMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length ) { - IOPhysicalAddress physAddr = 0; + addr64_t physAddr = 0; if( prepare() == kIOReturnSuccess) { - physAddr = getPhysicalSegment( offset, length ); + physAddr = getPhysicalSegment64( offset, length ); complete(); } - return( physAddr ); + return( (IOPhysicalAddress) physAddr ); // truncated but only page offset is used } IOByteCount IOMemoryDescriptor::readBytes @@ -1038,137 +1138,247 @@ extern "C" unsigned int IODefaultCacheBits(addr64_t pa); panic("IOGMD::setPosition deprecated"); /* DEPRECATED */ } -IOPhysicalAddress IOGeneralMemoryDescriptor::getPhysicalSegment - (IOByteCount offset, IOByteCount *lengthOfSegment) +IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const { - IOPhysicalAddress address = 0; - IOPhysicalLength length = 0; + if (kIOMDGetCharacteristics == op) { -// assert(offset <= _length); - if (offset < _length) // (within bounds?) - { - if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) { - unsigned int ind; + if (dataSize < sizeof(IOMDDMACharacteristics)) + return kIOReturnUnderrun; - // Physical address based memory descriptor + IOMDDMACharacteristics *data = (IOMDDMACharacteristics *) vData; + data->fLength = _length; + data->fSGCount = _rangesCount; + data->fPages = _pages; + data->fDirection = _direction; + if (!_wireCount) + data->fIsPrepared = false; + else { + data->fIsPrepared = true; + data->fHighestPage = _highestPage; + if (_memoryEntries) { + ioGMDData *gmdData = getDataP(_memoryEntries); + ioPLBlock *ioplList = getIOPLList(gmdData); + UInt count = getNumIOPL(_memoryEntries, gmdData); + + data->fIsMapped = (gmdData->fMapper && _pages && (count > 0) + && ioplList[0].fMappedBase); + if (count == 1) + data->fPageAlign = (ioplList[0].fPageOffset & PAGE_MASK) | ~PAGE_MASK; + } + else + data->fIsMapped = false; + } - // Find offset within descriptor and make it relative - // to the current _range. - for (ind = 0 ; offset >= _ranges.p[ind].length; ind++ ) - offset -= _ranges.p[ind].length; - - IOPhysicalRange cur = _ranges.p[ind]; - address = cur.address + offset; - length = cur.length - offset; - - // see how far we can coalesce ranges - for (++ind; ind < _rangesCount; ind++) { - cur = _ranges.p[ind]; - - if (address + length != cur.address) - break; - - length += cur.length; - } + return kIOReturnSuccess; + } + else if (!(kIOMDWalkSegments & op)) + return kIOReturnBadArgument; + + // Get the next segment + struct InternalState { + IOMDDMAWalkSegmentArgs fIO; + UInt fOffset2Index; + UInt fIndex; + UInt fNextOffset; + } *isP; + + // Find the next segment + if (dataSize < sizeof(*isP)) + return kIOReturnUnderrun; + + isP = (InternalState *) vData; + UInt offset = isP->fIO.fOffset; + bool mapped = isP->fIO.fMapped; + + if (offset >= _length) + return (offset == _length)? kIOReturnOverrun : kIOReturnInternalError; + + // Validate the previous offset + UInt ind, off2Ind = isP->fOffset2Index; + if ((kIOMDFirstSegment != op) + && offset + && (offset == isP->fNextOffset || off2Ind <= offset)) + ind = isP->fIndex; + else + ind = off2Ind = 0; // Start from beginning - // @@@ gvdl: should be assert(address); - // but can't as NVidia GeForce creates a bogus physical mem - assert(address - || /* nvidia */ (!_ranges.p[0].address && 1 == _rangesCount)); - assert(length); - } - else do { - // We need wiring & we are wired. - assert(_wireCount); + UInt length; + UInt64 address; + if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) { - if (!_wireCount) - { - panic("IOGMD: not wired for getPhysicalSegment()"); - continue; - } + // Physical address based memory descriptor + const IOPhysicalRange *physP = (IOPhysicalRange *) &_ranges.p[0]; - assert(_memoryEntries); + // Find the range after the one that contains the offset + UInt len; + for (len = 0; off2Ind <= offset; ind++) { + len = physP[ind].length; + off2Ind += len; + } - ioGMDData * dataP = getDataP(_memoryEntries); - const ioPLBlock *ioplList = getIOPLList(dataP); - UInt ind, numIOPLs = getNumIOPL(_memoryEntries, dataP); - upl_page_info_t *pageList = getPageList(dataP); + // Calculate length within range and starting address + length = off2Ind - offset; + address = physP[ind - 1].address + len - length; - assert(numIOPLs > 0); + // see how far we can coalesce ranges + while (ind < _rangesCount && address + length == physP[ind].address) { + len = physP[ind].length; + length += len; + off2Ind += len; + ind++; + } - // Scan through iopl info blocks looking for block containing offset - for (ind = 1; ind < numIOPLs; ind++) { - if (offset < ioplList[ind].fIOMDOffset) - break; - } + // correct contiguous check overshoot + ind--; + off2Ind -= len; + } + else if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64) { - // Go back to actual range as search goes past it - ioPLBlock ioplInfo = ioplList[ind - 1]; + // Physical address based memory descriptor + const IOAddressRange *physP = (IOAddressRange *) &_ranges.v64[0]; - if (ind < numIOPLs) - length = ioplList[ind].fIOMDOffset; - else - length = _length; - length -= offset; // Remainder within iopl + // Find the range after the one that contains the offset + mach_vm_size_t len; + for (len = 0; off2Ind <= offset; ind++) { + len = physP[ind].length; + off2Ind += len; + } - // Subtract offset till this iopl in total list - offset -= ioplInfo.fIOMDOffset; + // Calculate length within range and starting address + length = off2Ind - offset; + address = physP[ind - 1].address + len - length; - // This is a mapped IOPL so we just need to compute an offset - // relative to the mapped base. - if (ioplInfo.fMappedBase) { - offset += (ioplInfo.fPageOffset & PAGE_MASK); - address = ptoa_32(ioplInfo.fMappedBase) + offset; - continue; - } + // see how far we can coalesce ranges + while (ind < _rangesCount && address + length == physP[ind].address) { + len = physP[ind].length; + length += len; + off2Ind += len; + ind++; + } + + // correct contiguous check overshoot + ind--; + off2Ind -= len; + } + else do { + if (!_wireCount) + panic("IOGMD: not wired for the IODMACommand"); - // Currently the offset is rebased into the current iopl. - // Now add the iopl 1st page offset. - offset += ioplInfo.fPageOffset; + assert(_memoryEntries); - // For external UPLs the fPageInfo field points directly to - // the upl's upl_page_info_t array. - if (ioplInfo.fFlags & kIOPLExternUPL) - pageList = (upl_page_info_t *) ioplInfo.fPageInfo; - else - pageList = &pageList[ioplInfo.fPageInfo]; + ioGMDData * dataP = getDataP(_memoryEntries); + const ioPLBlock *ioplList = getIOPLList(dataP); + UInt numIOPLs = getNumIOPL(_memoryEntries, dataP); + upl_page_info_t *pageList = getPageList(dataP); - // Check for direct device non-paged memory - if ( ioplInfo.fFlags & kIOPLOnDevice ) { - address = ptoa_32(pageList->phys_addr) + offset; - continue; - } + assert(numIOPLs > 0); - // Now we need compute the index into the pageList - ind = atop_32(offset); - offset &= PAGE_MASK; + // Scan through iopl info blocks looking for block containing offset + while (ind < numIOPLs && offset >= ioplList[ind].fIOMDOffset) + ind++; - IOPhysicalAddress pageAddr = pageList[ind].phys_addr; - address = ptoa_32(pageAddr) + offset; - - // Check for the remaining data in this upl being longer than the - // remainder on the current page. This should be checked for - // contiguous pages - if (length > PAGE_SIZE - offset) { - // See if the next page is contiguous. Stop looking when we hit - // the end of this upl, which is indicated by the - // contigLength >= length. - IOByteCount contigLength = PAGE_SIZE - offset; - - // Look for contiguous segment - while (contigLength < length - && ++pageAddr == pageList[++ind].phys_addr) { - contigLength += PAGE_SIZE; - } - if (length > contigLength) - length = contigLength; - } - - assert(address); - assert(length); + // Go back to actual range as search goes past it + ioPLBlock ioplInfo = ioplList[ind - 1]; + off2Ind = ioplInfo.fIOMDOffset; + + if (ind < numIOPLs) + length = ioplList[ind].fIOMDOffset; + else + length = _length; + length -= offset; // Remainder within iopl + + // Subtract offset till this iopl in total list + offset -= off2Ind; + + // If a mapped address is requested and this is a pre-mapped IOPL + // then just need to compute an offset relative to the mapped base. + if (mapped && ioplInfo.fMappedBase) { + offset += (ioplInfo.fPageOffset & PAGE_MASK); + address = ptoa_64(ioplInfo.fMappedBase) + offset; + continue; // Done leave do/while(false) now + } + + // The offset is rebased into the current iopl. + // Now add the iopl 1st page offset. + offset += ioplInfo.fPageOffset; + + // For external UPLs the fPageInfo field points directly to + // the upl's upl_page_info_t array. + if (ioplInfo.fFlags & kIOPLExternUPL) + pageList = (upl_page_info_t *) ioplInfo.fPageInfo; + else + pageList = &pageList[ioplInfo.fPageInfo]; + + // Check for direct device non-paged memory + if ( ioplInfo.fFlags & kIOPLOnDevice ) { + address = ptoa_64(pageList->phys_addr) + offset; + continue; // Done leave do/while(false) now + } - } while (0); + // Now we need compute the index into the pageList + UInt pageInd = atop_32(offset); + offset &= PAGE_MASK; + + // Compute the starting address of this segment + IOPhysicalAddress pageAddr = pageList[pageInd].phys_addr; + address = ptoa_64(pageAddr) + offset; + + // length is currently set to the length of the remainider of the iopl. + // We need to check that the remainder of the iopl is contiguous. + // This is indicated by pageList[ind].phys_addr being sequential. + IOByteCount contigLength = PAGE_SIZE - offset; + while (contigLength < length + && ++pageAddr == pageList[++pageInd].phys_addr) + { + contigLength += PAGE_SIZE; + } + + if (contigLength < length) + length = contigLength; + + + assert(address); + assert(length); + + } while (false); + + // Update return values and state + isP->fIO.fIOVMAddr = address; + isP->fIO.fLength = length; + isP->fIndex = ind; + isP->fOffset2Index = off2Ind; + isP->fNextOffset = isP->fIO.fOffset + length; + + return kIOReturnSuccess; +} + +addr64_t +IOGeneralMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *lengthOfSegment) +{ + IOReturn ret; + IOByteCount length = 0; + addr64_t address = 0; + if (offset < _length) // (within bounds?) + { + IOMDDMAWalkSegmentState _state; + IOMDDMAWalkSegmentArgs * state = (IOMDDMAWalkSegmentArgs *) &_state; + + state->fOffset = offset; + state->fLength = _length - offset; + state->fMapped = false; + + ret = dmaCommandOperation(kIOMDFirstSegment, _state, sizeof(_state)); + + if ((kIOReturnSuccess != ret) && (kIOReturnOverrun != ret)) + DEBG("getPhysicalSegment64 dmaCommandOperation(%lx), %p, offset %qx, addr %qx, len %qx\n", + ret, this, state->fOffset, + state->fIOVMAddr, state->fLength); + if (kIOReturnSuccess == ret) + { + address = state->fIOVMAddr; + length = state->fLength; + } if (!address) length = 0; } @@ -1176,29 +1386,80 @@ IOPhysicalAddress IOGeneralMemoryDescriptor::getPhysicalSegment if (lengthOfSegment) *lengthOfSegment = length; - return address; + return (address); +} + +IOPhysicalAddress +IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +{ + IOReturn ret; + IOByteCount length = 0; + addr64_t address = 0; + +// assert(offset <= _length); + + if (offset < _length) // (within bounds?) + { + IOMDDMAWalkSegmentState _state; + IOMDDMAWalkSegmentArgs * state = (IOMDDMAWalkSegmentArgs *) &_state; + + state->fOffset = offset; + state->fLength = _length - offset; + state->fMapped = true; + + ret = dmaCommandOperation( + kIOMDFirstSegment, _state, sizeof(_state)); + + if ((kIOReturnSuccess != ret) && (kIOReturnOverrun != ret)) + DEBG("getPhysicalSegment dmaCommandOperation(%lx), %p, offset %qx, addr %qx, len %qx\n", + ret, this, state->fOffset, + state->fIOVMAddr, state->fLength); + if (kIOReturnSuccess == ret) + { + address = state->fIOVMAddr; + length = state->fLength; + } + + if (!address) + length = 0; + } + + if ((address + length) > 0x100000000ULL) + { + panic("getPhysicalSegment() out of 32b range 0x%qx, len 0x%x, class %s", + address, length, (getMetaClass())->getClassName()); + } + + if (lengthOfSegment) + *lengthOfSegment = length; + + return ((IOPhysicalAddress) address); } -addr64_t IOMemoryDescriptor::getPhysicalSegment64 - (IOByteCount offset, IOByteCount *lengthOfSegment) +addr64_t +IOMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *lengthOfSegment) { IOPhysicalAddress phys32; IOByteCount length; addr64_t phys64; + IOMapper * mapper = 0; phys32 = getPhysicalSegment(offset, lengthOfSegment); if (!phys32) return 0; if (gIOSystemMapper) + mapper = gIOSystemMapper; + + if (mapper) { IOByteCount origLen; - phys64 = gIOSystemMapper->mapAddr(phys32); + phys64 = mapper->mapAddr(phys32); origLen = *lengthOfSegment; length = page_size - (phys64 & (page_size - 1)); while ((length < origLen) - && ((phys64 + length) == gIOSystemMapper->mapAddr(phys32 + length))) + && ((phys64 + length) == mapper->mapAddr(phys32 + length))) length += page_size; if (length > origLen) length = origLen; @@ -1211,8 +1472,8 @@ addr64_t IOMemoryDescriptor::getPhysicalSegment64 return phys64; } -IOPhysicalAddress IOGeneralMemoryDescriptor:: -getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +IOPhysicalAddress +IOGeneralMemoryDescriptor::getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) { IOPhysicalAddress address = 0; IOPhysicalLength length = 0; @@ -1277,6 +1538,42 @@ getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +IOReturn +IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const +{ + if (kIOMDGetCharacteristics == op) { + if (dataSize < sizeof(IOMDDMACharacteristics)) + return kIOReturnUnderrun; + + IOMDDMACharacteristics *data = (IOMDDMACharacteristics *) vData; + data->fLength = getLength(); + data->fSGCount = 0; + data->fDirection = _direction; + if (IOMapper::gSystem) + data->fIsMapped = true; + data->fIsPrepared = true; // Assume prepared - fails safe + } + else if (kIOMDWalkSegments & op) { + if (dataSize < sizeof(IOMDDMAWalkSegmentArgs)) + return kIOReturnUnderrun; + + IOMDDMAWalkSegmentArgs *data = (IOMDDMAWalkSegmentArgs *) vData; + IOByteCount offset = (IOByteCount) data->fOffset; + + IOPhysicalLength length; + IOMemoryDescriptor *ncmd = const_cast(this); + if (data->fMapped && IOMapper::gSystem) + data->fIOVMAddr = ncmd->getPhysicalSegment(offset, &length); + else + data->fIOVMAddr = ncmd->getPhysicalSegment64(offset, &length); + data->fLength = length; + } + else + return kIOReturnBadArgument; + + return kIOReturnSuccess; +} + IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, IOOptionBits * oldState ) { @@ -1408,10 +1705,12 @@ io_get_kernel_static_upl( vm_size_t *upl_size, upl_t *upl, upl_page_info_array_t page_list, - unsigned int *count) + unsigned int *count, + ppnum_t *highest_page) { unsigned int pageCount, page; ppnum_t phys; + ppnum_t highestPage = 0; pageCount = atop_32(*upl_size); if (pageCount > *count) @@ -1430,8 +1729,12 @@ io_get_kernel_static_upl( page_list[page].dirty = 0; page_list[page].precious = 0; page_list[page].device = 0; + if (phys > highestPage) + highestPage = page; } + *highest_page = highestPage; + return ((page >= pageCount) ? kIOReturnSuccess : kIOReturnVMError); } @@ -1445,7 +1748,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) ipc_port_t sharedMem = (ipc_port_t) _memEntry; assert(!_wireCount); - assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type); + assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type); if (_pages >= gIOMaximumMappedIOPageCount) return kIOReturnNoResources; @@ -1464,7 +1767,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) forDirection = _direction; int uplFlags; // This Mem Desc's default flags for upl creation - switch (forDirection) + switch (kIODirectionOutIn & forDirection) { case kIODirectionOut: // Pages do not need to be marked as dirty on commit @@ -1479,6 +1782,11 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) } uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE; +#ifdef UPL_NEED_32BIT_ADDR + if (kIODirectionPrepareToPhys32 & forDirection) + uplFlags |= UPL_NEED_32BIT_ADDR; +#endif + // Find the appropriate vm_map for the given task vm_map_t curMap; if (_task == kernel_task && (kIOMemoryBufferPageable & _flags)) @@ -1490,10 +1798,12 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) Ranges vec = _ranges; unsigned int pageIndex = 0; IOByteCount mdOffset = 0; + ppnum_t highestPage = 0; for (UInt range = 0; range < _rangesCount; range++) { ioPLBlock iopl; user_addr_t startPage; IOByteCount numBytes; + ppnum_t highPage = 0; // Get the startPage address and length of vec[range] getAddrLenForInd(startPage, numBytes, type, vec, range); @@ -1533,7 +1843,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) &ioplSize, &iopl.fIOPL, baseInfo, - &numPageInfo); + &numPageInfo, + &highPage); } else if (sharedMem) { error = memory_object_iopl_request(sharedMem, @@ -1559,6 +1870,11 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if (error != KERN_SUCCESS) goto abortExit; + if (iopl.fIOPL) + highPage = upl_get_highest_page(iopl.fIOPL); + if (highPage > highestPage) + highestPage = highPage; + error = kIOReturnNoMemory; if (baseInfo->device) { @@ -1573,7 +1889,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) } else { iopl.fFlags = 0; - if (mapper) + if (mapper) mapper->iovmInsert(mapBase, pageIndex, baseInfo, numPageInfo); } @@ -1615,6 +1931,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) } } + _highestPage = highestPage; + return kIOReturnSuccess; abortExit: @@ -1654,7 +1972,7 @@ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection) IOOptionBits type = _flags & kIOMemoryTypeMask; if (!_wireCount - && (kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type) ) { + && (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) ) { error = wireVirtual(forDirection); if (error) return error; @@ -1685,7 +2003,7 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) if (!_wireCount) { IOOptionBits type = _flags & kIOMemoryTypeMask; - if (kIOMemoryTypePhysical == type) { + if ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type)) { /* kIOMemoryTypePhysical */ // DO NOTHING } @@ -1698,7 +2016,7 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) dataP->fMapper->iovmFree(ioplList[0].fMappedBase, _pages); // Only complete iopls that we created which are for TypeVirtual - if (kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type) { + if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) { for (UInt ind = 0; ind < count; ind++) if (ioplList[ind].fIOPL) { upl_commit(ioplList[ind].fIOPL, 0, 0); @@ -1749,7 +2067,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap( vm_size_t size = ptoa_32(_pages); if( _task) { -#ifndef i386 + memory_object_size_t actualSize = size; kr = mach_make_memory_entry_64(get_task_map(_task), &actualSize, range0Addr, @@ -1766,10 +2084,9 @@ IOReturn IOGeneralMemoryDescriptor::doMap( } if( KERN_SUCCESS != kr) -#endif /* !i386 */ sharedMem = MACH_PORT_NULL; - } else do { + } else do { // _task == 0, must be physical memory_object_t pager; unsigned int flags = 0; @@ -1844,11 +2161,9 @@ IOReturn IOGeneralMemoryDescriptor::doMap( } -#ifndef i386 if( 0 == sharedMem) kr = kIOReturnVMError; else -#endif kr = super::doMap( addressMap, atAddress, options, sourceOffset, length ); @@ -2255,19 +2570,6 @@ IOReturn IOMemoryDescriptor::handleFault( segLen - pageOffset); #endif - - - - -#ifdef i386 - /* i386 doesn't support faulting on device memory yet */ - if( addressMap && (kIOReturnSuccess == err)) - err = IOMapPages( addressMap, address, (IOPhysicalAddress) physAddr, segLen, options ); - assert( KERN_SUCCESS == err ); - if( err) - break; -#endif - if( pager) { if( reserved && reserved->pagerContig) { IOPhysicalLength allLen; @@ -2291,7 +2593,7 @@ IOReturn IOMemoryDescriptor::handleFault( if( err) break; } -#ifndef i386 + /* *** ALERT *** */ /* *** Temporary Workaround *** */ @@ -2319,7 +2621,7 @@ IOReturn IOMemoryDescriptor::handleFault( /* *** Temporary Workaround *** */ /* *** ALERT *** */ -#endif + sourceOffset += segLen - pageOffset; address += segLen; bytes -= segLen; @@ -2413,28 +2715,46 @@ IOReturn _IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) } else { LOCK; - if( logical && addressMap - && (!safeTask || (get_task_map(safeTask) != addressMap)) - && (0 == (options & kIOMapStatic))) + + do { - IOUnmapPages( addressMap, logical, length ); - if(!doRedirect && safeTask - && ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical)) - { - err = vm_deallocate( addressMap, logical, length ); - err = memory->doMap( addressMap, &logical, - (options & ~kIOMapAnywhere) /*| kIOMapReserve*/, - offset, length ); - } else - err = kIOReturnSuccess; + if (!logical) + break; + if (!addressMap) + break; + + if ((!safeTask || (get_task_map(safeTask) != addressMap)) + && (0 == (options & kIOMapStatic))) + { + IOUnmapPages( addressMap, logical, length ); + if(!doRedirect && safeTask + && (((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + || ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64))) + { + err = vm_deallocate( addressMap, logical, length ); + err = memory->doMap( addressMap, &logical, + (options & ~kIOMapAnywhere) /*| kIOMapReserve*/, + offset, length ); + } else + err = kIOReturnSuccess; #ifdef DEBUG - IOLog("IOMemoryMap::redirect(%d, %p) %x:%lx from %p\n", doRedirect, this, logical, length, addressMap); + IOLog("IOMemoryMap::redirect(%d, %p) %x:%lx from %p\n", doRedirect, this, logical, length, addressMap); #endif - } - UNLOCK; + } + else if (kIOMapWriteCombineCache == (options & kIOMapCacheMask)) + { + IOOptionBits newMode; + newMode = (options & ~kIOMapCacheMask) | (doRedirect ? kIOMapInhibitCache : kIOMapWriteCombineCache); + IOProtectCacheMode(addressMap, logical, length, newMode); + } + } + while (false); + + UNLOCK; } - if (((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + if ((((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + || ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64)) && safeTask && (doRedirect != (0 != (memory->_flags & kIOMemoryRedirected)))) memory->redirect(safeTask, doRedirect); @@ -2592,8 +2912,8 @@ _IOMemoryMap * _IOMemoryMap::copyCompatible( return( mapping ); } -IOPhysicalAddress _IOMemoryMap::getPhysicalSegment( IOByteCount _offset, - IOPhysicalLength * _length) +IOPhysicalAddress +_IOMemoryMap::getPhysicalSegment( IOByteCount _offset, IOPhysicalLength * _length) { IOPhysicalAddress address; @@ -2618,6 +2938,20 @@ void IOMemoryDescriptor::initialize( void ) IORegistryEntry::getRegistryRoot()->setProperty(kIOMaximumMappedIOByteCountKey, ptoa_64(gIOMaximumMappedIOPageCount), 64); + if (!gIOCopyMapper) + { + IOMapper * + mapper = new IOCopyMapper; + if (mapper) + { + if (mapper->init() && mapper->start(NULL)) + gIOCopyMapper = (IOCopyMapper *) mapper; + else + mapper->release(); + } + } + + gIOLastPage = IOGetLastPageNumber(); } void IOMemoryDescriptor::free( void ) @@ -2686,7 +3020,8 @@ IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, if (logical && addressMap) do { - if ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + if (((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + || ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64)) { physMem = memory; physMem->retain(); @@ -2762,7 +3097,8 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( if (owner != this) continue; - if ((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + if (((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + || ((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64)) { phys = getPhysicalSegment(offset, &physLen); if (!phys || (physLen < length)) @@ -2945,8 +3281,71 @@ void IOSubMemoryDescriptor::free( void ) } -IOPhysicalAddress IOSubMemoryDescriptor::getPhysicalSegment( IOByteCount offset, - IOByteCount * length ) +IOReturn +IOSubMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const +{ + IOReturn rtn; + + if (kIOMDGetCharacteristics == op) { + + rtn = _parent->dmaCommandOperation(op, vData, dataSize); + if (kIOReturnSuccess == rtn) { + IOMDDMACharacteristics *data = (IOMDDMACharacteristics *) vData; + data->fLength = _length; + data->fSGCount = 0; // XXX gvdl: need to compute and pages + data->fPages = 0; + data->fPageAlign = 0; + } + + return rtn; + } + else if (kIOMDWalkSegments & op) { + if (dataSize < sizeof(IOMDDMAWalkSegmentArgs)) + return kIOReturnUnderrun; + + IOMDDMAWalkSegmentArgs *data = + reinterpret_cast(vData); + UInt offset = data->fOffset; + UInt remain = _length - offset; + if ((int) remain <= 0) + return (!remain)? kIOReturnOverrun : kIOReturnInternalError; + + data->fOffset = offset + _start; + rtn = _parent->dmaCommandOperation(op, vData, dataSize); + if (data->fLength > remain) + data->fLength = remain; + data->fOffset = offset; + + return rtn; + } + else + return kIOReturnBadArgument; +} + +addr64_t +IOSubMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount * length) +{ + addr64_t address; + IOByteCount actualLength; + + assert(offset <= _length); + + if( length) + *length = 0; + + if( offset >= _length) + return( 0 ); + + address = _parent->getPhysicalSegment64( offset + _start, &actualLength ); + + if( address && length) + *length = min( _length - offset, actualLength ); + + return( address ); +} + +IOPhysicalAddress +IOSubMemoryDescriptor::getPhysicalSegment( IOByteCount offset, IOByteCount * length ) { IOPhysicalAddress address; IOByteCount actualLength; @@ -2980,8 +3379,8 @@ IOReturn IOSubMemoryDescriptor::doMap( return (_parent->doMap(addressMap, atAddress, options, sourceOffset + _start, length)); } -IOPhysicalAddress IOSubMemoryDescriptor::getSourceSegment( IOByteCount offset, - IOByteCount * length ) +IOPhysicalAddress +IOSubMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length ) { IOPhysicalAddress address; IOByteCount actualLength; @@ -3328,7 +3727,7 @@ OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 1); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 2); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 3); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 4); -OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 5); +OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 5); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 6); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 7); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 8); @@ -3341,5 +3740,9 @@ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 14); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 15); /* ex-inline function implementation */ -IOPhysicalAddress IOMemoryDescriptor::getPhysicalAddress() +IOPhysicalAddress +IOMemoryDescriptor::getPhysicalAddress() { return( getPhysicalSegment( 0, 0 )); } + + + diff --git a/iokit/Kernel/IOMultiMemoryDescriptor.cpp b/iokit/Kernel/IOMultiMemoryDescriptor.cpp index ea706f33a..257a3aadb 100644 --- a/iokit/Kernel/IOMultiMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMultiMemoryDescriptor.cpp @@ -270,9 +270,35 @@ IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOPhysicalAddress IOMultiMemoryDescriptor::getPhysicalSegment( - IOByteCount offset, - IOByteCount * length ) +addr64_t IOMultiMemoryDescriptor::getPhysicalSegment64( + IOByteCount offset, IOByteCount * length ) +{ + // + // This method returns the physical address of the byte at the given offset + // into the memory, and optionally the length of the physically contiguous + // segment from that offset. + // + + assert(offset <= _length); + + for ( unsigned index = 0; index < _descriptorsCount; index++ ) + { + if ( offset < _descriptors[index]->getLength() ) + { + return _descriptors[index]->getPhysicalSegment64(offset, length); + } + offset -= _descriptors[index]->getLength(); + } + + if ( length ) *length = 0; + + return 0; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +IOPhysicalAddress IOMultiMemoryDescriptor::getPhysicalSegment( + IOByteCount offset, IOByteCount * length ) { // // This method returns the physical address of the byte at the given offset diff --git a/iokit/Kernel/IOPMPowerSource.cpp b/iokit/Kernel/IOPMPowerSource.cpp index c91b1ea3d..d71aa2da1 100644 --- a/iokit/Kernel/IOPMPowerSource.cpp +++ b/iokit/Kernel/IOPMPowerSource.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,146 +21,373 @@ */ #include +#include +#include +#include -#define super OSObject +#define super IOService -OSDefineMetaClassAndStructors(IOPMPowerSource, OSObject) +OSDefineMetaClassAndStructors(IOPMPowerSource, IOService) -// ********************************************************************************** -// init +// ***************************************************************************** +// powerSource // -// ********************************************************************************** -bool IOPMPowerSource::init (unsigned short whichBatteryIndex) -{ - if (!super::init ()) - return false; +// Static initializer for IOPMPowerSource. Returns a new instance of the class +// which the caller must attach to the power plane. +// ***************************************************************************** - bBatteryIndex = whichBatteryIndex; - nextInList = 0; +IOPMPowerSource *IOPMPowerSource::powerSource(void) +{ + IOPMPowerSource *ps = new IOPMPowerSource; - return true; + if(ps) { + ps->init(); + return ps; + } + return NULL; } -// ********************************************************************************** -// capacityPercentRemaining +// ***************************************************************************** +// init // -// ********************************************************************************** -unsigned long IOPMPowerSource::capacityPercentRemaining (void) +// ***************************************************************************** +bool IOPMPowerSource::init (void) { - unsigned long percentage = 0; + if (!super::init()) { + return false; + } - if (bMaxCapacity > 0) - percentage = (bCurCapacity * 100) / bMaxCapacity; - - // always return a non-zero value unless the real capacity IS zero. - if (percentage == 0 && bCurCapacity > 0) - percentage = 1; + nextInList = NULL; + + properties = OSDictionary::withCapacity(10); + if(!properties) return false; + properties->setCapacityIncrement(1); - return percentage; + externalConnectedKey = OSSymbol::withCString(kIOPMPSExternalConnectedKey); + externalChargeCapableKey = OSSymbol::withCString(kIOPMPSExternalChargeCapableKey); + batteryInstalledKey = OSSymbol::withCString(kIOPMPSBatteryInstalledKey); + chargingKey = OSSymbol::withCString(kIOPMPSIsChargingKey); + warnLevelKey = OSSymbol::withCString(kIOPMPSAtWarnLevelKey); + criticalLevelKey = OSSymbol::withCString(kIOPMPSAtCriticalLevelKey); + currentCapacityKey = OSSymbol::withCString(kIOPMPSCurrentCapacityKey); + maxCapacityKey = OSSymbol::withCString(kIOPMPSMaxCapacityKey); + timeRemainingKey = OSSymbol::withCString(kIOPMPSTimeRemainingKey); + amperageKey = OSSymbol::withCString(kIOPMPSAmperageKey); + voltageKey = OSSymbol::withCString(kIOPMPSVoltageKey); + cycleCountKey = OSSymbol::withCString(kIOPMPSCycleCountKey); + adapterInfoKey = OSSymbol::withCString(kIOPMPSAdapterInfoKey); + locationKey = OSSymbol::withCString(kIOPMPSLocationKey); + errorConditionKey = OSSymbol::withCString(kIOPMPSErrorConditionKey); + manufacturerKey = OSSymbol::withCString(kIOPMPSManufacturerKey); + modelKey = OSSymbol::withCString(kIOPMPSModelKey); + serialKey = OSSymbol::withCString(kIOPMPSSerialKey); + batteryInfoKey = OSSymbol::withCString(kIOPMPSLegacyBatteryInfoKey); + + return true; } -// ********************************************************************************** -// atWarnLevel +// ***************************************************************************** +// free // -// ********************************************************************************** -bool IOPMPowerSource::atWarnLevel (void) +// ***************************************************************************** +void IOPMPowerSource::free(void) { - return bFlags & kBatteryAtWarn; + if(properties) properties->release(); + if(externalConnectedKey) externalConnectedKey->release(); + if(externalChargeCapableKey) externalChargeCapableKey->release(); + if(batteryInstalledKey) batteryInstalledKey->release(); + if(chargingKey) chargingKey->release(); + if(warnLevelKey) warnLevelKey->release(); + if(criticalLevelKey) criticalLevelKey->release(); + if(currentCapacityKey) currentCapacityKey->release(); + if(maxCapacityKey) maxCapacityKey->release(); + if(timeRemainingKey) timeRemainingKey->release(); + if(amperageKey) amperageKey->release(); + if(voltageKey) voltageKey->release(); + if(cycleCountKey) cycleCountKey->release(); + if(adapterInfoKey) adapterInfoKey->release(); + if(errorConditionKey) errorConditionKey->release(); + if(manufacturerKey) manufacturerKey->release(); + if(modelKey) modelKey->release(); + if(serialKey) serialKey->release(); + if(locationKey) locationKey->release(); + if(batteryInfoKey) batteryInfoKey->release(); } -// ********************************************************************************** -// acConnected +// ***************************************************************************** +// updateStatus // -// ********************************************************************************** -bool IOPMPowerSource::acConnected (void) +// Update power source state in IORegistry and message interested clients +// notifying them of our change. +// ***************************************************************************** +void IOPMPowerSource::updateStatus (void) { - return bFlags & kACInstalled; + OSCollectionIterator *iterator; + OSObject *iteratorKey; + OSObject *obj; + + iterator = OSCollectionIterator::withCollection(properties); + if(!iterator) return; + + while ((iteratorKey = iterator->getNextObject())) { + OSSymbol *key; + + key = OSDynamicCast(OSSymbol, iteratorKey); + if (!key) continue; + obj = properties->getObject(key); + if(!obj) continue; + setProperty(key, obj); + } + iterator->release(); + + // And up goes the flare + messageClients(kIOPMMessageBatteryStatusHasChanged); } -// ********************************************************************************** -// depleted -// -// ********************************************************************************** -bool IOPMPowerSource::depleted (void) -{ - return bFlags & kBatteryDepleted; + +/******************************************************************************* + * + * PROTECTED Accessors. All the setters! Yay! + * + ******************************************************************************/ + +void IOPMPowerSource::setExternalConnected(bool b) { + properties->setObject( + externalConnectedKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// isInstalled -// -// ********************************************************************************** -bool IOPMPowerSource::isInstalled (void) -{ - return bFlags & kBatteryInstalled; +void IOPMPowerSource::setExternalChargeCapable(bool b) { + properties->setObject( + externalChargeCapableKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// isCharging -// -// ********************************************************************************** -bool IOPMPowerSource::isCharging (void) -{ - return bFlags & kBatteryCharging; +void IOPMPowerSource::setBatteryInstalled(bool b) { + properties->setObject( + batteryInstalledKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// timeRemaining -// -// ********************************************************************************** -unsigned long IOPMPowerSource::timeRemaining (void) -{ - return bTimeRemaining; +void IOPMPowerSource::setIsCharging(bool b) { + properties->setObject( + chargingKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// maxCapacity -// -// ********************************************************************************** -unsigned long IOPMPowerSource::maxCapacity (void) -{ - return bMaxCapacity; +void IOPMPowerSource::setAtWarnLevel(bool b) { + properties->setObject( + warnLevelKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// curCapacity -// -// ********************************************************************************** -unsigned long IOPMPowerSource::curCapacity (void) -{ - return bCurCapacity; +void IOPMPowerSource::setAtCriticalLevel(bool b) { + properties->setObject( + criticalLevelKey, + b ? kOSBooleanTrue:kOSBooleanFalse); } -// ********************************************************************************** -// currentDrawn -// -// ********************************************************************************** -long IOPMPowerSource::currentDrawn (void) -{ - return bCurrent; + +void IOPMPowerSource::setCurrentCapacity(unsigned int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + currentCapacityKey, + n); + n->release(); } -// ********************************************************************************** -// voltage -// -// ********************************************************************************** +void IOPMPowerSource::setMaxCapacity(unsigned int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + maxCapacityKey, + n); + n->release(); +} -unsigned long IOPMPowerSource::voltage (void) -{ - return bVoltage; +void IOPMPowerSource::setTimeRemaining(int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + timeRemainingKey, + n); + n->release(); } -// ********************************************************************************** -// updateStatus -// -// ********************************************************************************** +void IOPMPowerSource::setAmperage(int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + amperageKey, + n); + n->release(); +} -void IOPMPowerSource::updateStatus (void) +void IOPMPowerSource::setVoltage(unsigned int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + voltageKey, + n); + n->release(); +} + +void IOPMPowerSource::setCycleCount(unsigned int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + cycleCountKey, + n); + n->release(); +} + +void IOPMPowerSource::setAdapterInfo(int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + adapterInfoKey, + n); + n->release(); +} + +void IOPMPowerSource::setLocation(int val) { + OSNumber *n = OSNumber::withNumber(val, 32); + properties->setObject( + locationKey, + n); + n->release(); +} + +void IOPMPowerSource::setErrorCondition(OSSymbol *s) { + properties->setObject(errorConditionKey, s); +} + +void IOPMPowerSource::setManufacturer(OSSymbol *s) { + properties->setObject(manufacturerKey, s); +} + +void IOPMPowerSource::setModel(OSSymbol *s) { + properties->setObject(modelKey, s); +} + +void IOPMPowerSource::setSerial(OSSymbol *s) { + properties->setObject(serialKey, s); +} + +void IOPMPowerSource::setLegacyIOBatteryInfo(OSDictionary *d) { + properties->setObject(batteryInfoKey, d); +} + + + + +/******************************************************************************* + * + * PUBLIC Accessors. All the getters! Boo! + * + ******************************************************************************/ + +bool IOPMPowerSource::externalConnected(void) { + return (kOSBooleanTrue == properties->getObject(externalConnectedKey)); +} + +bool IOPMPowerSource::externalChargeCapable(void) { + return (kOSBooleanTrue == properties->getObject(externalChargeCapableKey)); +} + +bool IOPMPowerSource::batteryInstalled(void) { + return (kOSBooleanTrue == properties->getObject(batteryInstalledKey)); +} + +bool IOPMPowerSource::isCharging(void) { + return (kOSBooleanTrue == properties->getObject(chargingKey)); +} + +bool IOPMPowerSource::atWarnLevel(void) { + return (kOSBooleanTrue == properties->getObject(warnLevelKey)); +} + +bool IOPMPowerSource::atCriticalLevel(void) { + return (kOSBooleanTrue == properties->getObject(criticalLevelKey)); +} + +unsigned int IOPMPowerSource::currentCapacity(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(currentCapacityKey)); + if(!n) return 0; + else return (unsigned int)n->unsigned32BitValue(); +} + +unsigned int IOPMPowerSource::maxCapacity(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(maxCapacityKey)); + if(!n) return 0; + else return (unsigned int)n->unsigned32BitValue(); +} + +unsigned int IOPMPowerSource::capacityPercentRemaining(void) { + unsigned int _currentCapacity = currentCapacity(); + unsigned int _maxCapacity = maxCapacity(); + if(0 == _maxCapacity) { + return 0; + } else { + return ((100*_currentCapacity) / _maxCapacity); + } +} +int IOPMPowerSource::timeRemaining(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(timeRemainingKey)); + if(!n) return 0; + else return (int)n->unsigned32BitValue(); } +int IOPMPowerSource::amperage(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(amperageKey)); + if(!n) return 0; + else return (int)n->unsigned32BitValue(); +} +unsigned int IOPMPowerSource::voltage(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(voltageKey)); + if(!n) return 0; + else return (unsigned int)n->unsigned32BitValue(); +} +unsigned int IOPMPowerSource::cycleCount(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(cycleCountKey)); + if(!n) return 0; + else return (unsigned int)n->unsigned32BitValue(); +} +int IOPMPowerSource::adapterInfo(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(adapterInfoKey)); + if(!n) return 0; + else return (int)n->unsigned32BitValue(); +} +int IOPMPowerSource::location(void) { + OSNumber *n; + n = OSDynamicCast(OSNumber, properties->getObject(locationKey)); + if(!n) return 0; + else return (unsigned int)n->unsigned32BitValue(); +} + +OSSymbol *IOPMPowerSource::errorCondition(void) { + return OSDynamicCast(OSSymbol, properties->getObject(errorConditionKey)); +} + +OSSymbol *IOPMPowerSource::manufacturer(void) { + return OSDynamicCast(OSSymbol, properties->getObject(manufacturerKey)); +} + +OSSymbol *IOPMPowerSource::model(void) { + return OSDynamicCast(OSSymbol, properties->getObject(modelKey)); +} + +OSSymbol *IOPMPowerSource::serial(void) { + return OSDynamicCast(OSSymbol, properties->getObject(serialKey)); +} + +OSDictionary *IOPMPowerSource::legacyIOBatteryInfo(void) { + return OSDynamicCast(OSDictionary, properties->getObject(batteryInfoKey)); +} diff --git a/iokit/Kernel/IOPMPowerSourceList.cpp b/iokit/Kernel/IOPMPowerSourceList.cpp index dc6c949fe..cace37fa4 100644 --- a/iokit/Kernel/IOPMPowerSourceList.cpp +++ b/iokit/Kernel/IOPMPowerSourceList.cpp @@ -26,54 +26,61 @@ #define super OSObject OSDefineMetaClassAndStructors(IOPMPowerSourceList,OSObject) -//********************************************************************************* +//****************************************************************************** // init // -//********************************************************************************* +//****************************************************************************** void IOPMPowerSourceList::initialize ( void ) { firstItem = NULL; length = 0; } -//********************************************************************************* +//****************************************************************************** // addToList // -//********************************************************************************* +//****************************************************************************** -IOReturn IOPMPowerSourceList::addToList ( IOPMPowerSource * newPowerSource ) +IOReturn IOPMPowerSourceList::addToList(IOPMPowerSource *newPowerSource) { IOPMPowerSource * nextPowerSource; - nextPowerSource = firstItem; // Is new object already in the list? - while ( nextPowerSource != NULL ) { - if ( nextPowerSource == newPowerSource ) { - return IOPMNoErr; // yes, just return + + // Is new object already in the list? + nextPowerSource = firstItem; + while ( nextPowerSource != NULL ) + { + if ( nextPowerSource == newPowerSource ) + { + // yes, just return + return IOPMNoErr; } nextPowerSource = nextInList(nextPowerSource); } - newPowerSource->nextInList = firstItem; // add it to list + + // add it to list + newPowerSource->nextInList = firstItem; firstItem = newPowerSource; - length += 1; + length++; return IOPMNoErr; } -//********************************************************************************* +//****************************************************************************** // firstInList // -//********************************************************************************* +//****************************************************************************** IOPMPowerSource * IOPMPowerSourceList::firstInList ( void ) { return firstItem; } -//********************************************************************************* +//****************************************************************************** // nextInList // -//********************************************************************************* +//****************************************************************************** -IOPMPowerSource * IOPMPowerSourceList::nextInList ( IOPMPowerSource * currentItem ) +IOPMPowerSource * IOPMPowerSourceList::nextInList(IOPMPowerSource *currentItem) { if ( currentItem != NULL ) { return (currentItem->nextInList); @@ -81,54 +88,56 @@ IOPMPowerSource * IOPMPowerSourceList::nextInList ( IOPMPowerSource * currentIte return NULL; } -//********************************************************************************* +//****************************************************************************** // numberOfItems // -//********************************************************************************* +//****************************************************************************** unsigned long IOPMPowerSourceList::numberOfItems ( void ) { return length; } -//********************************************************************************* +//****************************************************************************** // removeFromList // // Find the item in the list, unlink it, and free it. -//********************************************************************************* +//****************************************************************************** IOReturn IOPMPowerSourceList::removeFromList ( IOPMPowerSource * theItem ) { IOPMPowerSource * item = firstItem; IOPMPowerSource * temp; - if ( item != NULL ) { - if ( item == theItem ) { - firstItem = item->nextInList; + if ( NULL == item) goto exit; + + if ( item == theItem ) { + firstItem = item->nextInList; + length--; + item->release(); + return IOPMNoErr; + } + while ( item->nextInList != NULL ) { + if ( item->nextInList == theItem ) { + temp = item->nextInList; + item->nextInList = temp->nextInList; length--; - item->release(); + temp->release(); return IOPMNoErr; } - while ( item->nextInList != NULL ) { - if ( item->nextInList == theItem ) { - temp = item->nextInList; - item->nextInList = temp->nextInList; - length--; - temp->release(); - return IOPMNoErr; - } - item = item->nextInList; - } + item = item->nextInList; } + +exit: return IOPMNoErr; } -//********************************************************************************* +//****************************************************************************** // free // // Free all items in the list, and then free the list itself -//********************************************************************************* +//****************************************************************************** void IOPMPowerSourceList::free (void ) { @@ -140,7 +149,7 @@ void IOPMPowerSourceList::free (void ) next->release(); next = firstItem; } -super::free(); + super::free(); } diff --git a/iokit/Kernel/IOPMPowerStateQueue.cpp b/iokit/Kernel/IOPMPowerStateQueue.cpp index 8f976a9db..54fc10387 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.cpp +++ b/iokit/Kernel/IOPMPowerStateQueue.cpp @@ -21,10 +21,46 @@ */ #include "IOPMPowerStateQueue.h" +#include "IOKit/IOLocks.h" #undef super #define super IOEventSource OSDefineMetaClassAndStructors(IOPMPowerStateQueue, IOEventSource); +#ifdef __i386__ /* ppc does this right and doesn't need these routines */ +static +void * OSDequeueAtomic(void ** inList, SInt32 inOffset) +{ + void * oldListHead; + void * newListHead; + + do { + oldListHead = *inList; + if (oldListHead == NULL) { + break; + } + + newListHead = *(void **) (((char *) oldListHead) + inOffset); + } while (! OSCompareAndSwap((UInt32)oldListHead, + (UInt32)newListHead, (UInt32 *)inList)); + return oldListHead; +} + +static +void OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset) +{ + void * oldListHead; + void * newListHead = inNewLink; + void ** newLinkNextPtr = (void **) (((char *) inNewLink) + inOffset); + + do { + oldListHead = *inList; + *newLinkNextPtr = oldListHead; + } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, + (UInt32 *)inList)); +} +#endif /* __i386__ */ + + IOPMPowerStateQueue *IOPMPowerStateQueue::PMPowerStateQueue(OSObject *inOwner) { IOPMPowerStateQueue *me = new IOPMPowerStateQueue; @@ -44,7 +80,9 @@ bool IOPMPowerStateQueue::init(OSObject *owner, Action action) // Queue of powerstate changes changes = NULL; - +#ifdef __i386__ + if (!(tmpLock = IOLockAlloc())) panic("IOPMPowerStateQueue::init can't alloc lock"); +#endif return true; } @@ -61,8 +99,13 @@ bool IOPMPowerStateQueue::unIdleOccurred(IOService *inTarget, unsigned long inSt new_one->target = inTarget; // Change to queue +#ifdef __i386__ + IOLockLock(tmpLock); +#endif OSEnqueueAtomic((void **)&changes, (void *)new_one, 0); - +#ifdef __i386__ + IOLockUnlock(tmpLock); +#endif signalWorkAvailable(); return true; @@ -77,8 +120,14 @@ bool IOPMPowerStateQueue::checkForWork() UInt16 theAction; // Dequeue and process the state change request +#ifdef __i386__ + IOLockLock(tmpLock); +#endif if((theNode = (PowerChangeEntry *)OSDequeueAtomic((void **)&changes, 0))) { +#ifdef __i386__ + IOLockUnlock(tmpLock); +#endif theState = theNode->state; theTarget = theNode->target; theAction = theNode->actionType; @@ -87,11 +136,15 @@ bool IOPMPowerStateQueue::checkForWork() switch (theAction) { case kUnIdle: - theTarget->command_received(theState, 0, 0, 0); + theTarget->command_received((void *)theState, 0, 0, 0); break; } } - +#ifdef __i386__ + else { + IOLockUnlock(tmpLock); + } +#endif // Return true if there's more work to be done if(changes) return true; else return false; diff --git a/iokit/Kernel/IOPMPowerStateQueue.h b/iokit/Kernel/IOPMPowerStateQueue.h index 2f87ebbc8..4ec35004a 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.h +++ b/iokit/Kernel/IOPMPowerStateQueue.h @@ -49,6 +49,9 @@ class IOPMPowerStateQueue : public IOEventSource }; void *changes; +#ifdef __i386__ + IOLock *tmpLock; +#endif protected: virtual bool checkForWork(void); diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 7c95249a5..1695b2aa6 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include "RootDomainUserClient.h" #include "IOKit/pwr_mgt/IOPowerConnection.h" #include "IOPMPowerStateQueue.h" @@ -39,7 +40,9 @@ #include #endif -extern "C" void kprintf(const char *, ...); +extern "C" { +IOReturn OSMetaClassSystemSleepOrWake( UInt32 ); +} extern const IORegistryPlane * gIOPowerPlane; @@ -47,6 +50,14 @@ IOReturn broadcast_aggressiveness ( OSObject *, void *, void *, void *, void * ) static void sleepTimerExpired(thread_call_param_t); static void wakeupClamshellTimerExpired ( thread_call_param_t us); +// "IOPMSetSleepSupported" callPlatformFunction name +static const OSSymbol *sleepSupportedPEFunction = NULL; + +#define kIOSleepSupportedKey "IOSleepSupported" + +#define kRD_AllPowerSources (kIOPMSupportedOnAC \ + | kIOPMSupportedOnBatt \ + | kIOPMSupportedOnUPS) #define number_of_power_states 5 #define OFF_STATE 0 @@ -60,25 +71,52 @@ static void wakeupClamshellTimerExpired ( thread_call_param_t us); #define SLEEP_POWER kIOPMAuxPowerOn #define DOZE_POWER kIOPMDoze +#define kLocalEvalClamshellCommand (1 << 15) + static IOPMPowerState ourPowerStates[number_of_power_states] = { - {1,0, 0, 0,0,0,0,0,0,0,0,0}, // state 0, off - {1,kIOPMRestartCapability, kIOPMRestart, RESTART_POWER,0,0,0,0,0,0,0,0}, // state 1, restart - {1,kIOPMSleepCapability, kIOPMSleep, SLEEP_POWER,0,0,0,0,0,0,0,0}, // state 2, sleep - {1,kIOPMDoze, kIOPMDoze, DOZE_POWER,0,0,0,0,0,0,0,0}, // state 3, doze - {1,kIOPMPowerOn, kIOPMPowerOn, ON_POWER,0,0,0,0,0,0,0,0}, // state 4, on + // state 0, off + {1,0, 0, 0,0,0,0,0,0,0,0,0}, + // state 1, restart + {1,kIOPMRestartCapability, kIOPMRestart, RESTART_POWER,0,0,0,0,0,0,0,0}, + // state 2, sleep + {1,kIOPMSleepCapability, kIOPMSleep, SLEEP_POWER,0,0,0,0,0,0,0,0}, + // state 3, doze + {1,kIOPMDoze, kIOPMDoze, DOZE_POWER,0,0,0,0,0,0,0,0}, + // state 4, on + {1,kIOPMPowerOn, kIOPMPowerOn, ON_POWER,0,0,0,0,0,0,0,0}, }; -// RESERVED IOPMrootDomain class variables -#define diskSyncCalloutEntry _reserved->diskSyncCalloutEntry -#define _settingController _reserved->_settingController -#define _batteryLocationNotifier _reserved->_batteryLocationNotifier -#define _displayWranglerNotifier _reserved->_displayWranglerNotifier - - static IOPMrootDomain * gRootDomain; static UInt32 gSleepOrShutdownPending = 0; +class PMSettingObject : public OSObject +{ + OSDeclareDefaultStructors(PMSettingObject) +private: + IOPMrootDomain *parent; + IOPMSettingControllerCallback func; + OSObject *target; + uintptr_t refcon; + uint32_t *publishedFeatureID; + int releaseAtCount; +public: + static PMSettingObject *pmSettingObject( + IOPMrootDomain *parent_arg, + IOPMSettingControllerCallback handler_arg, + OSObject *target_arg, + uintptr_t refcon_arg, + uint32_t supportedPowerSources, + const OSSymbol *settings[]); + + void setPMSetting(const OSSymbol *type, OSObject *obj); + + void taggedRelease(const void *tag, const int when) const; + void free(void); +}; + + + #define super IOService OSDefineMetaClassAndStructors(IOPMrootDomain,IOService) @@ -200,10 +238,28 @@ static void disk_sync_callout(thread_call_param_t p0, thread_call_param_t p1) // expert informs us we are the root. // ********************************************************************************** +#define kRootDomainSettingsCount 12 bool IOPMrootDomain::start ( IOService * nub ) { - OSDictionary *tmpDict; + OSIterator *psIterator; + OSDictionary *tmpDict; + const OSSymbol *settingsArr[kRootDomainSettingsCount] = + { + OSSymbol::withCString(kIOPMSettingSleepOnPowerButtonKey), + OSSymbol::withCString(kIOPMSettingAutoWakeSecondsKey), + OSSymbol::withCString(kIOPMSettingAutoPowerSecondsKey), + OSSymbol::withCString(kIOPMSettingAutoWakeCalendarKey), + OSSymbol::withCString(kIOPMSettingAutoPowerCalendarKey), + OSSymbol::withCString(kIOPMSettingDebugWakeRelativeKey), + OSSymbol::withCString(kIOPMSettingDebugPowerRelativeKey), + OSSymbol::withCString(kIOPMSettingWakeOnRingKey), + OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey), + OSSymbol::withCString(kIOPMSettingWakeOnClamshellKey), + OSSymbol::withCString(kIOPMSettingWakeOnACChangeKey), + OSSymbol::withCString(kIOPMSettingTimeZoneOffsetKey) + }; + pmPowerStateQueue = 0; @@ -215,33 +271,55 @@ bool IOPMrootDomain::start ( IOService * nub ) gRootDomain = this; PMinit(); - setProperty("IOSleepSupported",""); + + sleepSupportedPEFunction = OSSymbol::withCString("IOPMSetSleepSupported"); + canSleep = true; + setProperty(kIOSleepSupportedKey,true); allowSleep = true; sleepIsSupported = true; systemBooting = true; - ignoringClamshell = true; sleepSlider = 0; idleSleepPending = false; - canSleep = true; wrangler = NULL; sleepASAP = false; - _settingController = NULL; + clamshellIsClosed = false; + clamshellExists = false; + ignoringClamshell = true; ignoringClamshellDuringWakeup = false; + acAdaptorConnect = true; tmpDict = OSDictionary::withCapacity(1); setProperty(kRootDomainSupportedFeatures, tmpDict); tmpDict->release(); + settingsCallbacks = OSDictionary::withCapacity(1); + + // Create a list of the valid PM settings that we'll relay to + // interested clients in setProperties() => setPMSetting() + allowedPMSettings = OSArray::withObjects( + (const OSObject **)settingsArr, + kRootDomainSettingsCount, + 0); + + fPMSettingsDict = OSDictionary::withCapacity(5); + pm_vars->PMworkloop = IOWorkLoop::workLoop(); pmPowerStateQueue = IOPMPowerStateQueue::PMPowerStateQueue(this); pm_vars->PMworkloop->addEventSource(pmPowerStateQueue); featuresDictLock = IOLockAlloc(); + settingsCtrlLock = IORecursiveLockAlloc(); - extraSleepTimer = thread_call_allocate((thread_call_func_t)sleepTimerExpired, (thread_call_param_t) this); - clamshellWakeupIgnore = thread_call_allocate((thread_call_func_t)wakeupClamshellTimerExpired, (thread_call_param_t) this); - diskSyncCalloutEntry = thread_call_allocate(&disk_sync_callout, (thread_call_param_t) this); + extraSleepTimer = thread_call_allocate( + (thread_call_func_t)sleepTimerExpired, + (thread_call_param_t) this); + clamshellWakeupIgnore = thread_call_allocate( + (thread_call_func_t)wakeupClamshellTimerExpired, + (thread_call_param_t) this); + diskSyncCalloutEntry = thread_call_allocate( + &disk_sync_callout, + (thread_call_param_t) this); // create our parent patriarch = new IORootParent; @@ -262,28 +340,32 @@ bool IOPMrootDomain::start ( IOService * nub ) registerPrioritySleepWakeInterest( &sysPowerDownHandler, this, 0); // Register for a notification when IODisplayWrangler is published - _displayWranglerNotifier = addNotification( gIOPublishNotification, - serviceMatching("IODisplayWrangler"), - &displayWranglerPublished, this, 0); + _displayWranglerNotifier = addNotification( + gIOPublishNotification, serviceMatching("IODisplayWrangler"), + &displayWranglerPublished, this, 0); - _batteryLocationNotifier = addNotification( gIOPublishNotification, - resourceMatching("battery"), - &batteryLocationPublished, this, this); + // Battery location published - ApplePMU support only + _batteryPublishNotifier = addNotification( + gIOPublishNotification, serviceMatching("IOPMPowerSource"), + &batteryPublished, this, this); + const OSSymbol *ucClassName = OSSymbol::withCStringNoCopy("RootDomainUserClient"); setProperty(gIOUserClientClassKey, (OSObject *) ucClassName); ucClassName->release(); - IORegistryEntry *temp_entry = NULL; - if( (temp_entry = IORegistryEntry::fromPath("mac-io/battery", gIODTPlane)) || - (temp_entry = IORegistryEntry::fromPath("mac-io/via-pmu/battery", gIODTPlane))) + // IOBacklightDisplay can take a long time to load at boot, or it may + // not load at all if you're booting with clamshell closed. We publish + // 'DisplayDims' here redundantly to get it published early and at all. + psIterator = getMatchingServices( serviceMatching("IOPMPowerSource") ); + if( psIterator && psIterator->getNextObject() ) { - // If this machine has a battery, publish the fact that the backlight - // supports dimming. - // Notice similar call in IOPMrootDomain::batteryLocationPublished() to - // detect batteries on SMU machines. + // There's at least one battery on the system, so we publish + // 'DisplayDims' support for the LCD. publishFeature("DisplayDims"); - temp_entry->release(); + } + if(psIterator) { + psIterator->release(); } IOHibernateSystemInit(this); @@ -293,17 +375,6 @@ bool IOPMrootDomain::start ( IOService * nub ) return true; } -IOReturn IOPMrootDomain::setPMSetting(int type, OSNumber *n) -{ - if(_settingController && _settingController->func) { - int seconds; - seconds = n->unsigned32BitValue(); - return (*(_settingController->func))(type, seconds, _settingController->refcon); - } else { - return kIOReturnNoDevice; - } -} - // ********************************************************************************** // setProperties // @@ -312,25 +383,27 @@ IOReturn IOPMrootDomain::setPMSetting(int type, OSNumber *n) // ********************************************************************************** IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) { - IOReturn return_value = kIOReturnSuccess; - OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); - OSBoolean *b; - OSNumber *n; - OSString *str; - const OSSymbol *boot_complete_string = OSSymbol::withCString("System Boot Complete"); - const OSSymbol *power_button_string = OSSymbol::withCString("DisablePowerButtonSleep"); - const OSSymbol *stall_halt_string = OSSymbol::withCString("StallSystemAtHalt"); - const OSSymbol *auto_wake_string = OSSymbol::withCString("wake"); - const OSSymbol *auto_power_string = OSSymbol::withCString("poweron"); - const OSSymbol *wakeonring_string = OSSymbol::withCString("WakeOnRing"); - const OSSymbol *fileserver_string = OSSymbol::withCString("AutoRestartOnPowerLoss"); - const OSSymbol *wakeonlid_string = OSSymbol::withCString("WakeOnLid"); - const OSSymbol *wakeonac_string = OSSymbol::withCString("WakeOnACChange"); - const OSSymbol *hibernatemode_string = OSSymbol::withCString(kIOHibernateModeKey); - const OSSymbol *hibernatefile_string = OSSymbol::withCString(kIOHibernateFileKey); - const OSSymbol *hibernatefreeratio_string = OSSymbol::withCString(kIOHibernateFreeRatioKey); - const OSSymbol *hibernatefreetime_string = OSSymbol::withCString(kIOHibernateFreeTimeKey); - const OSSymbol *timezone_string = OSSymbol::withCString("TimeZoneOffsetSeconds"); + IOReturn return_value = kIOReturnSuccess; + OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); + OSBoolean *b; + OSNumber *n; + OSString *str; + OSSymbol *type; + OSObject *obj; + unsigned int i; + + const OSSymbol *boot_complete_string = + OSSymbol::withCString("System Boot Complete"); + const OSSymbol *stall_halt_string = + OSSymbol::withCString("StallSystemAtHalt"); + const OSSymbol *hibernatemode_string = + OSSymbol::withCString(kIOHibernateModeKey); + const OSSymbol *hibernatefile_string = + OSSymbol::withCString(kIOHibernateFileKey); + const OSSymbol *hibernatefreeratio_string = + OSSymbol::withCString(kIOHibernateFreeRatioKey); + const OSSymbol *hibernatefreetime_string = + OSSymbol::withCString(kIOHibernateFreeTimeKey); if(!dict) { @@ -344,14 +417,15 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) { systemBooting = false; adjustPowerState(); + + // If lid is closed, re-send lid closed notification + // now that booting is complete. + if( clamshellIsClosed ) + { + this->receivePowerNotification(kLocalEvalClamshellCommand); + } } - if( power_button_string - && (b = OSDynamicCast(OSBoolean, dict->getObject(power_button_string))) ) - { - setProperty(power_button_string, b); - } - if( stall_halt_string && (b = OSDynamicCast(OSBoolean, dict->getObject(stall_halt_string))) ) { @@ -359,94 +433,43 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) } if ( hibernatemode_string - && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string)))) + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string)))) { - setProperty(hibernatemode_string, n); + setProperty(hibernatemode_string, n); } if ( hibernatefreeratio_string - && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string)))) + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string)))) { - setProperty(hibernatefreeratio_string, n); + setProperty(hibernatefreeratio_string, n); } if ( hibernatefreetime_string - && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string)))) + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string)))) { - setProperty(hibernatefreetime_string, n); + setProperty(hibernatefreetime_string, n); } if ( hibernatefile_string - && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string)))) - { - setProperty(hibernatefile_string, str); - } - - // Relay AutoWake setting to its controller - if( auto_wake_string - && (n = OSDynamicCast(OSNumber, dict->getObject(auto_wake_string))) ) + && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string)))) { - return_value = setPMSetting(kIOPMAutoWakeSetting, n); - if(kIOReturnSuccess != return_value) goto exit; - } - - // Relay AutoPower setting to its controller - if( auto_power_string - && (n = OSDynamicCast(OSNumber, dict->getObject(auto_power_string))) ) - { - return_value = setPMSetting(kIOPMAutoPowerOnSetting, n); - if(kIOReturnSuccess != return_value) goto exit; + setProperty(hibernatefile_string, str); } - // Relay WakeOnRing setting to its controller - if( wakeonring_string - && (n = OSDynamicCast(OSNumber, dict->getObject(wakeonring_string))) ) - { - return_value = setPMSetting(kIOPMWakeOnRingSetting, n); - if(kIOReturnSuccess != return_value) goto exit; - } - - // Relay FileServer setting to its controller - if( fileserver_string - && (n = OSDynamicCast(OSNumber, dict->getObject(fileserver_string))) ) - { - return_value = setPMSetting(kIOPMAutoRestartOnPowerLossSetting, n); - if(kIOReturnSuccess != return_value) goto exit; - } + // Relay our allowed PM settings onto our registered PM clients + for(i = 0; i < allowedPMSettings->getCount(); i++) { - // Relay WakeOnLid setting to its controller - if( wakeonlid_string - && (n = OSDynamicCast(OSNumber, dict->getObject(wakeonlid_string))) ) - { - return_value = setPMSetting(kIOPMWakeOnLidSetting, n); - if(kIOReturnSuccess != return_value) goto exit; - } - - // Relay WakeOnACChange setting to its controller - if( wakeonac_string - && (n = OSDynamicCast(OSNumber, dict->getObject(wakeonac_string))) ) - { - return_value = setPMSetting(kIOPMWakeOnACChangeSetting, n); - if(kIOReturnSuccess != return_value) goto exit; - } + type = (OSSymbol *)allowedPMSettings->getObject(i); + if(!type) continue; - // Relay timezone offset in seconds to SMU - if( timezone_string - && (n = OSDynamicCast(OSNumber, dict->getObject(timezone_string))) ) - { - return_value = setPMSetting(kIOPMTimeZoneSetting, n); + obj = dict->getObject(type); + if(!obj) continue; + + return_value = setPMSetting(type, obj); + if(kIOReturnSuccess != return_value) goto exit; } - exit: if(boot_complete_string) boot_complete_string->release(); - if(power_button_string) power_button_string->release(); if(stall_halt_string) stall_halt_string->release(); - if(auto_wake_string) auto_wake_string->release(); - if(auto_power_string) auto_power_string->release(); - if(wakeonring_string) wakeonring_string->release(); - if(fileserver_string) fileserver_string->release(); - if(wakeonlid_string) wakeonlid_string->release(); - if(wakeonac_string) wakeonac_string->release(); - if(timezone_string) timezone_string->release(); return return_value; } @@ -561,13 +584,12 @@ void IOPMrootDomain::handleSleepTimerExpiration ( void ) void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup(void) { - OSObject * state; - // Allow clamshell-induced sleep now ignoringClamshellDuringWakeup = false; - if ((state = getProperty(kAppleClamshellStateKey))) - publishResource(kAppleClamshellStateKey, state); + // Re-send clamshell event, in case it causes a sleep + if(clamshellIsClosed) + this->receivePowerNotification( kLocalEvalClamshellCommand ); } //********************************************************************************* @@ -611,7 +633,6 @@ IOReturn IOPMrootDomain::setAggressiveness ( unsigned long type, unsigned long n // ********************************************************************************** IOReturn IOPMrootDomain::sleepSystem ( void ) { - //kprintf("sleep demand received\n"); if ( !systemBooting && allowSleep && sleepIsSupported ) { patriarch->sleepSystem(); @@ -682,7 +703,7 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) // code will resume execution here. // Now we're waking... - IOHibernateSystemWake(); + IOHibernateSystemWake(); // stay awake for at least 30 seconds clock_interval_to_deadline(30, kSecondScale, &deadline); @@ -716,7 +737,9 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) if(getProperty(kIOREMSleepEnabledKey)) { // clamshellWakeupIgnore callout clears ignoreClamshellDuringWakeup bit clock_interval_to_deadline(5, kSecondScale, &deadline); - if(clamshellWakeupIgnore) thread_call_enter_delayed(clamshellWakeupIgnore, deadline); + if(clamshellWakeupIgnore) { + thread_call_enter_delayed(clamshellWakeupIgnore, deadline); + } } else ignoringClamshellDuringWakeup = false; // Find out what woke us @@ -781,8 +804,11 @@ void IOPMrootDomain::wakeFromDoze( void ) { if ( pm_vars->myCurrentState == DOZE_STATE ) { - // reset this till next attempt - canSleep = true; + // Reset sleep support till next sleep attempt. + // A machine's support of sleep vs. doze can change over the course of + // a running system, so we recalculate it before every sleep. + setSleepSupported(0); + powerOverrideOffPriv(); // early wake notification @@ -794,43 +820,263 @@ void IOPMrootDomain::wakeFromDoze( void ) } -// ********************************************************************************** +// ***************************************************************************** // publishFeature // // Adds a new feature to the supported features dictionary // // -// ********************************************************************************** +// ***************************************************************************** void IOPMrootDomain::publishFeature( const char * feature ) { + publishFeature(feature, kIOPMSupportedOnAC + | kIOPMSupportedOnBatt + | kIOPMSupportedOnUPS, + NULL); + return; +} + + +// ***************************************************************************** +// publishFeature (with supported power source specified) +// +// Adds a new feature to the supported features dictionary +// +// +// ***************************************************************************** +void IOPMrootDomain::publishFeature( + const char *feature, + uint32_t supportedWhere, + uint32_t *uniqueFeatureID) +{ + static uint16_t next_feature_id = 500; + + OSNumber *new_feature_data = NULL; + OSNumber *existing_feature = NULL; + OSArray *existing_feature_arr = NULL; + OSObject *osObj = NULL; + uint32_t feature_value = 0; + + supportedWhere &= kRD_AllPowerSources; // mask off any craziness! + + if(!supportedWhere) { + // Feature isn't supported anywhere! + return; + } + + if(next_feature_id > 5000) { + // Far, far too many features! + return; + } + if(featuresDictLock) IOLockLock(featuresDictLock); + OSDictionary *features = (OSDictionary *) getProperty(kRootDomainSupportedFeatures); - if ( features && OSDynamicCast(OSDictionary, features)) + // Create new features dict if necessary + if ( features && OSDynamicCast(OSDictionary, features)) { features = OSDictionary::withDictionary(features); - else + } else { features = OSDictionary::withCapacity(1); + } + + // Create OSNumber to track new feature + + next_feature_id += 1; + if( uniqueFeatureID ) { + // We don't really mind if the calling kext didn't give us a place + // to stash their unique id. Many kexts don't plan to unload, and thus + // have no need to remove themselves later. + *uniqueFeatureID = next_feature_id; + } + + feature_value = supportedWhere + (next_feature_id << 16); + new_feature_data = OSNumber::withNumber( + (unsigned long long)feature_value, 32); + + // Does features object already exist? + if( (osObj = features->getObject(feature)) ) + { + if(( existing_feature = OSDynamicCast(OSNumber, osObj) )) + { + // We need to create an OSArray to hold the now 2 elements. + existing_feature_arr = OSArray::withObjects( + (const OSObject **)&existing_feature, 1, 2); + existing_feature_arr->setObject(new_feature_data); + features->setObject(feature, existing_feature_arr); + } else if(( existing_feature_arr = OSDynamicCast(OSArray, osObj) )) + { + // Add object to existing array + existing_feature_arr->setObject(new_feature_data); + } + } else { + // The easy case: no previously existing features listed. We simply + // set the OSNumber at key 'feature' and we're on our way. + features->setObject(feature, new_feature_data); + } + + new_feature_data->release(); - features->setObject(feature, kOSBooleanTrue); setProperty(kRootDomainSupportedFeatures, features); + features->release(); - if(featuresDictLock) IOLockUnlock(featuresDictLock); + + // Notify EnergySaver and all those in user space so they might + // re-populate their feature specific UI + messageClients(kIOPMMessageFeatureChange, this); + + if(featuresDictLock) IOLockUnlock(featuresDictLock); +} + +// ***************************************************************************** +// removePublishedFeature +// +// Removes previously published feature +// +// +// ***************************************************************************** +IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) +{ + IOReturn ret = kIOReturnError; + uint32_t feature_value = 0; + uint16_t feature_id = 0; + bool madeAChange = false; + + OSSymbol *dictKey = NULL; + OSCollectionIterator *dictIterator = NULL; + OSArray *arrayMember = NULL; + OSNumber *numberMember = NULL; + OSObject *osObj = NULL; + OSNumber *osNum = NULL; + + if(featuresDictLock) IOLockLock(featuresDictLock); + + OSDictionary *features = + (OSDictionary *) getProperty(kRootDomainSupportedFeatures); + + if ( features && OSDynamicCast(OSDictionary, features) ) + { + // Any modifications to the dictionary are made to the copy to prevent + // races & crashes with userland clients. Dictionary updated + // automically later. + features = OSDictionary::withDictionary(features); + } else { + features = NULL; + ret = kIOReturnNotFound; + goto exit; + } + + // We iterate 'features' dictionary looking for an entry tagged + // with 'removeFeatureID'. If found, we remove it from our tracking + // structures and notify the OS via a general interest message. + + dictIterator = OSCollectionIterator::withCollection(features); + if(!dictIterator) { + goto exit; + } + + while( (dictKey = OSDynamicCast(OSSymbol, dictIterator->getNextObject())) ) + { + osObj = features->getObject(dictKey); + + // Each Feature is either tracked by an OSNumber + if( osObj && (numberMember = OSDynamicCast(OSNumber, osObj)) ) + { + feature_value = numberMember->unsigned32BitValue(); + feature_id = (uint16_t)(feature_value >> 16); + + if( feature_id == (uint16_t)removeFeatureID ) + { + // Remove this node + features->removeObject(dictKey); + madeAChange = true; + break; + } + + // Or tracked by an OSArray of OSNumbers + } else if( osObj && (arrayMember = OSDynamicCast(OSArray, osObj)) ) + { + unsigned int arrayCount = arrayMember->getCount(); + + for(unsigned int i=0; igetObject(i)); + if(!osNum) { + continue; + } + + feature_value = osNum->unsigned32BitValue(); + feature_id = (uint16_t)(feature_value >> 16); + + if( feature_id == (uint16_t)removeFeatureID ) + { + // Remove this node + if( 1 == arrayCount ) { + // If the array only contains one element, remove + // the whole thing. + features->removeObject(dictKey); + } else { + // Otherwise just remove the element in question. + arrayMember->removeObject(i); + } + + madeAChange = true; + break; + } + } + } + } + + + dictIterator->release(); + + if( madeAChange ) + { + ret = kIOReturnSuccess; + + setProperty(kRootDomainSupportedFeatures, features); + + // Notify EnergySaver and all those in user space so they might + // re-populate their feature specific UI + messageClients(kIOPMMessageFeatureChange, this); + } else { + ret = kIOReturnNotFound; + } + +exit: + if(features) features->release(); + if(featuresDictLock) IOLockUnlock(featuresDictLock); + return ret; } +// ********************************************************************************** +// unIdleDevice +// +// Enqueues unidle event to be performed later in a serialized context. +// +// ********************************************************************************** void IOPMrootDomain::unIdleDevice( IOService *theDevice, unsigned long theState ) { if(pmPowerStateQueue) pmPowerStateQueue->unIdleOccurred(theDevice, theState); } +// ********************************************************************************** +// announcePowerSourceChange +// +// Notifies "interested parties" that the batteries have changed state +// +// ********************************************************************************** void IOPMrootDomain::announcePowerSourceChange( void ) { - IORegistryEntry *_batteryRegEntry = (IORegistryEntry *) getProperty("BatteryEntry"); + IORegistryEntry *_batteryRegEntry = (IORegistryEntry *) getProperty("BatteryEntry"); + + // (if possible) re-publish power source state under IOPMrootDomain; + // only do so if the battery controller publishes an IOResource + // defining battery location. Called from ApplePMU battery driver. - // (if possible) re-publish power source state under IOPMrootDomain - // (only done if the battery controller publishes an IOResource defining battery location) if(_batteryRegEntry) { OSArray *batt_info; @@ -839,57 +1085,234 @@ void IOPMrootDomain::announcePowerSourceChange( void ) setProperty(kIOBatteryInfoKey, batt_info); } - messageClients(kIOPMMessageBatteryStatusHasChanged); } -IOReturn IOPMrootDomain::registerPMSettingController - (IOPMSettingControllerCallback func, void *info) + +// ***************************************************************************** +// setPMSetting (private) +// +// Internal helper to relay PM settings changes from user space to individual +// drivers. Should be called only by IOPMrootDomain::setProperties. +// +// ***************************************************************************** +IOReturn IOPMrootDomain::setPMSetting( + const OSSymbol *type, + OSObject *obj) { - if(_settingController) return kIOReturnExclusiveAccess; - - _settingController = (PMSettingCtrl *)IOMalloc(sizeof(PMSettingCtrl)); - if(!_settingController) return kIOReturnNoMemory; + OSArray *arr = NULL; + PMSettingObject *p_obj = NULL; + int count; + int i; + + if(NULL == type) return kIOReturnBadArgument; + + IORecursiveLockLock(settingsCtrlLock); - _settingController->func = func; - _settingController->refcon = info; + fPMSettingsDict->setObject(type, obj); + + arr = (OSArray *)settingsCallbacks->getObject(type); + if(NULL == arr) goto exit; + count = arr->getCount(); + for(i=0; igetObject(i)); + if(p_obj) p_obj->setPMSetting(type, obj); + } + +exit: + IORecursiveLockUnlock(settingsCtrlLock); return kIOReturnSuccess; } +// ***************************************************************************** +// copyPMSetting (public) +// +// Allows kexts to safely read setting values, without being subscribed to +// notifications. +// +// ***************************************************************************** +OSObject * IOPMrootDomain::copyPMSetting( + OSSymbol *whichSetting) +{ + OSObject *obj = NULL; + + if(!whichSetting) return NULL; -//********************************************************************************* -// receivePowerNotification + IORecursiveLockLock(settingsCtrlLock); + obj = fPMSettingsDict->getObject(whichSetting); + if(obj) { + obj->retain(); + } + IORecursiveLockUnlock(settingsCtrlLock); + + return obj; +} + +// ***************************************************************************** +// registerPMSettingController (public) // -// The power controller is notifying us of a hardware-related power management -// event that we must handle. This is a result of an 'environment' interrupt from -// the power mgt micro. -//********************************************************************************* +// direct wrapper to registerPMSettingController with uint32_t power source arg +// ***************************************************************************** +IOReturn IOPMrootDomain::registerPMSettingController( + const OSSymbol * settings[], + IOPMSettingControllerCallback func, + OSObject *target, + uintptr_t refcon, + OSObject **handle) +{ + return registerPMSettingController( + settings, + (kIOPMSupportedOnAC | kIOPMSupportedOnBatt | kIOPMSupportedOnUPS), + func, target, refcon, handle); +} -IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) +// ***************************************************************************** +// registerPMSettingController (public) +// +// Kexts may register for notifications when a particular setting is changed. +// A list of settings is available in IOPM.h. +// Arguments: +// * settings - An OSArray containing OSSymbols. Caller should populate this +// array with a list of settings caller wants notifications from. +// * func - A C function callback of the type IOPMSettingControllerCallback +// * target - caller may provide an OSObject *, which PM will pass as an +// target to calls to "func" +// * refcon - caller may provide an void *, which PM will pass as an +// argument to calls to "func" +// * handle - This is a return argument. We will populate this pointer upon +// call success. Hold onto this and pass this argument to +// IOPMrootDomain::deRegisterPMSettingCallback when unloading your kext +// Returns: +// kIOReturnSuccess on success +// ***************************************************************************** +IOReturn IOPMrootDomain::registerPMSettingController( + const OSSymbol * settings[], + uint32_t supportedPowerSources, + IOPMSettingControllerCallback func, + OSObject *target, + uintptr_t refcon, + OSObject **handle) { - if (msg & kIOPMOverTemp) + PMSettingObject *pmso = NULL; + OSArray *list = NULL; + IOReturn ret = kIOReturnSuccess; + int i; + + if( NULL == settings || + NULL == func || + NULL == handle) { - IOLog("Power Management received emergency overtemp signal. Going to sleep."); - (void) sleepSystem (); + return kIOReturnBadArgument; } - if (msg & kIOPMSetDesktopMode) + + + pmso = PMSettingObject::pmSettingObject( + (IOPMrootDomain *)this, func, target, + refcon, supportedPowerSources, settings); + + if(!pmso) { + ret = kIOReturnInternalError; + goto bail_no_unlock; + } + + IORecursiveLockLock(settingsCtrlLock); + for(i=0; settings[i]; i++) { - desktopMode = (0 != (msg & kIOPMSetValue)); - msg &= ~(kIOPMSetDesktopMode | kIOPMSetValue); + list = (OSArray *)settingsCallbacks->getObject(settings[i]); + if(!list) { + // New array of callbacks for this setting + list = OSArray::withCapacity(1); + settingsCallbacks->setObject(settings[i], list); + list->release(); + } + + // Add caller to the callback list + list->setObject(pmso); } - if (msg & kIOPMSetACAdaptorConnected) + + ret = kIOReturnSuccess; + + // Track this instance by its OSData ptr from now on + *handle = pmso; + + IORecursiveLockUnlock(settingsCtrlLock); + +bail_no_unlock: + if(kIOReturnSuccess != ret) { - acAdaptorConnect = (0 != (msg & kIOPMSetValue)); - msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue); + // Error return case + if(pmso) pmso->release(); + if(handle) *handle = NULL; } - if (msg & kIOPMEnableClamshell) + return ret; +} + +//****************************************************************************** +// sleepOnClamshellClosed +// +// contains the logic to determine if the system should sleep when the clamshell +// is closed. +//****************************************************************************** + +bool IOPMrootDomain::shouldSleepOnClamshellClosed ( void ) +{ + return ( !ignoringClamshell + && !ignoringClamshellDuringWakeup + && !(desktopMode && acAdaptorConnect) ); +} + +void IOPMrootDomain::sendClientClamshellNotification ( void ) +{ + /* Only broadcast clamshell alert if clamshell exists. */ + if(!clamshellExists) + return; + + setProperty(kAppleClamshellStateKey, + clamshellIsClosed ? kOSBooleanTrue : kOSBooleanFalse); + + setProperty(kAppleClamshellCausesSleepKey, + shouldSleepOnClamshellClosed() ? kOSBooleanTrue : kOSBooleanFalse); + + + /* Argument to message is a bitfiel of + * ( kClamshellStateBit | kClamshellSleepBit ) + */ + messageClients(kIOPMMessageClamshellStateChange, + (void *) ( (clamshellIsClosed ? kClamshellStateBit : 0) + | ( shouldSleepOnClamshellClosed() ? kClamshellSleepBit : 0)) ); +} + +//****************************************************************************** +// receivePowerNotification +// +// The power controller is notifying us of a hardware-related power management +// event that we must handle. This is a result of an 'environment' interrupt from +// the power mgt micro. +//****************************************************************************** + +IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) +{ + bool eval_clamshell = false; + + /* + * Local (IOPMrootDomain only) eval clamshell command + */ + if (msg & kLocalEvalClamshellCommand) { - ignoringClamshell = false; + eval_clamshell = true; } - if (msg & kIOPMDisableClamshell) + + /* + * Overtemp + */ + if (msg & kIOPMOverTemp) { - ignoringClamshell = true; + IOLog("PowerManagement emergency overtemp signal. Going to sleep!"); + (void) sleepSystem (); } + /* + * PMU Processor Speed Change + */ if (msg & kIOPMProcessorSpeedChange) { IOService *pmu = waitForService(serviceMatching("ApplePMU")); @@ -898,26 +1321,138 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) pmu->callPlatformFunction("recoverFromSleep", false, 0, 0, 0, 0); } + /* + * Sleep Now! + */ if (msg & kIOPMSleepNow) { (void) sleepSystem (); } + /* + * Power Emergency + */ if (msg & kIOPMPowerEmergency) { (void) sleepSystem (); } - if (msg & kIOPMClamshellClosed) + + /* + * Clamshell OPEN + */ + if (msg & kIOPMClamshellOpened) + { + // Received clamshel open message from clamshell controlling driver + // Update our internal state and tell general interest clients + clamshellIsClosed = false; + clamshellExists = true; + + sendClientClamshellNotification(); + } + + /* + * Clamshell CLOSED + * Send the clamshell interest notification since the lid is closing. + */ + if (msg & kIOPMClamshellClosed) + { + // Received clamshel open message from clamshell controlling driver + // Update our internal state and tell general interest clients + clamshellIsClosed = true; + clamshellExists = true; + + sendClientClamshellNotification(); + + // And set eval_clamshell = so we can attempt + eval_clamshell = true; + } + + /* + * Set Desktop mode (sent from graphics) + * + * -> reevaluate lid state + */ + if (msg & kIOPMSetDesktopMode) + { + desktopMode = (0 != (msg & kIOPMSetValue)); + msg &= ~(kIOPMSetDesktopMode | kIOPMSetValue); + + sendClientClamshellNotification(); + + // Re-evaluate the lid state + if( clamshellIsClosed ) + { + eval_clamshell = true; + } + } + + /* + * AC Adaptor connected + * + * -> reevaluate lid state + */ + if (msg & kIOPMSetACAdaptorConnected) { - if ( !ignoringClamshell && !ignoringClamshellDuringWakeup - && (!desktopMode || !acAdaptorConnect) ) + acAdaptorConnect = (0 != (msg & kIOPMSetValue)); + msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue); + + sendClientClamshellNotification(); + + // Re-evaluate the lid state + if( clamshellIsClosed ) { + eval_clamshell = true; + } - (void) sleepSystem (); + } + + /* + * Enable Clamshell (external display disappear) + * + * -> reevaluate lid state + */ + if (msg & kIOPMEnableClamshell) + { + // Re-evaluate the lid state + // System should sleep on external display disappearance + // in lid closed operation. + if( clamshellIsClosed && (true == ignoringClamshell) ) + { + eval_clamshell = true; } + + ignoringClamshell = false; + + sendClientClamshellNotification(); + } + + /* + * Disable Clamshell (external display appeared) + * We don't bother re-evaluating clamshell state. If the system is awake, + * the lid is probably open. + */ + if (msg & kIOPMDisableClamshell) + { + ignoringClamshell = true; + + sendClientClamshellNotification(); + } + + /* + * Evaluate clamshell and SLEEP if appropiate + */ + if ( eval_clamshell && shouldSleepOnClamshellClosed() ) + { + + + // SLEEP! + sleepSystem(); } + /* + * Power Button + */ if (msg & kIOPMPowerButton) { // toggle state of sleep/wake @@ -930,23 +1465,29 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) reportUserInput(); } else { + OSString *pbs = OSString::withCString("DisablePowerButtonSleep"); // Check that power button sleep is enabled - if(kOSBooleanTrue != getProperty(OSString::withCString("DisablePowerButtonSleep"))) + if( pbs ) { + if( kOSBooleanTrue != getProperty(pbs)) sleepSystem(); + } } } - // if the case has been closed, we allow - // the machine to be put to sleep or to idle sleep - + /* + * Allow Sleep + * + */ if ( (msg & kIOPMAllowSleep) && !allowSleep ) { allowSleep = true; adjustPowerState(); } - // if the case has been opened, we disallow sleep/doze - + /* + * Prevent Sleep + * + */ if (msg & kIOPMPreventSleep) { allowSleep = false; // are we dozing? @@ -978,9 +1519,12 @@ void IOPMrootDomain::setSleepSupported( IOOptionBits flags ) { canSleep = false; } else { + canSleep = true; platformSleepSupport = flags; } + setProperty(kIOSleepSupportedKey, canSleep); + } //********************************************************************************* @@ -1078,10 +1622,23 @@ bool IOPMrootDomain::tellChangeDown ( unsigned long stateNum ) switch ( stateNum ) { case DOZE_STATE: case SLEEP_STATE: + + // Direct callout into OSMetaClass so it can disable kmod unloads + // during sleep/wake to prevent deadlocks. + OSMetaClassSystemSleepOrWake( kIOMessageSystemWillSleep ); + return super::tellClientsWithResponse(kIOMessageSystemWillSleep); case RESTART_STATE: + // Unsupported shutdown ordering hack on RESTART only + // For Bluetooth and USB (4368327) + super::tellClients(iokit_common_msg(0x759)); + return super::tellClientsWithResponse(kIOMessageSystemWillRestart); case OFF_STATE: + // Unsupported shutdown ordering hack on SHUTDOWN only + // For Bluetooth and USB (4554440) + super::tellClients(iokit_common_msg(0x749)); + return super::tellClientsWithResponse(kIOMessageSystemWillPowerOff); } // this shouldn't execute @@ -1135,6 +1692,10 @@ void IOPMrootDomain::tellChangeUp ( unsigned long stateNum) { if ( stateNum == ON_STATE ) { + // Direct callout into OSMetaClass so it can disable kmod unloads + // during sleep/wake to prevent deadlocks. + OSMetaClassSystemSleepOrWake( kIOMessageSystemHasPoweredOn ); + IOHibernateSystemPostWake(); return tellClients(kIOMessageSystemHasPoweredOn); } @@ -1196,6 +1757,21 @@ IOReturn IOPMrootDomain::changePowerStateTo ( unsigned long ordinal ) IOReturn IOPMrootDomain::changePowerStateToPriv ( unsigned long ordinal ) { + IOReturn ret; + + if( SLEEP_STATE == ordinal && sleepSupportedPEFunction ) + { + + // Determine if the machine supports sleep, or must doze. + ret = getPlatform()->callPlatformFunction( + sleepSupportedPEFunction, false, + NULL, NULL, NULL, NULL); + + // If the machine only supports doze, the callPlatformFunction call + // boils down toIOPMrootDomain::setSleepSupported(kPCICantSleep), + // otherwise nothing. + } + return super::changePowerStateToPriv(ordinal); } @@ -1355,10 +1931,13 @@ IOReturn IOPMrootDomain::displayWranglerNotification( void * target, void * refC // //********************************************************************************* -bool IOPMrootDomain::displayWranglerPublished( void * target, void * refCon, - IOService * newService) +bool IOPMrootDomain::displayWranglerPublished( + void * target, + void * refCon, + IOService * newService) { - IOPMrootDomain * rootDomain = OSDynamicCast(IOPMrootDomain, (IOService *)target); + IOPMrootDomain *rootDomain = + OSDynamicCast(IOPMrootDomain, (IOService *)target); if(!rootDomain) return false; @@ -1366,40 +1945,35 @@ bool IOPMrootDomain::displayWranglerPublished( void * target, void * refCon, rootDomain->wrangler = newService; // we found the display wrangler, now install a handler - if( !rootDomain->wrangler->registerInterest( gIOGeneralInterest, &displayWranglerNotification, target, 0) ) { - IOLog("IOPMrootDomain::displayWranglerPublished registerInterest failed\n"); + if( !rootDomain->wrangler->registerInterest( gIOGeneralInterest, + &displayWranglerNotification, target, 0) ) + { return false; } return true; } + //********************************************************************************* -// batteryLocationPublished +// batteryPublished // -// Notification on AppleSMU publishing location of battery data +// Notification on battery class IOPowerSource appearance // -//********************************************************************************* - -bool IOPMrootDomain::batteryLocationPublished( void * target, void * root_domain, - IOService * resourceService ) -{ - IORegistryEntry *battery_location; - - battery_location = (IORegistryEntry *) resourceService->getProperty("battery"); - if (!battery_location || !OSDynamicCast(IORegistryEntry, battery_location)) - return (true); - - ((IOPMrootDomain *)root_domain)->setProperty("BatteryEntry", battery_location); - - // rdar://2936060 +//****************************************************************************** + +bool IOPMrootDomain::batteryPublished( + void * target, + void * root_domain, + IOService * resourceService ) +{ + // rdar://2936060&4435589 // All laptops have dimmable LCD displays // All laptops have batteries // So if this machine has a battery, publish the fact that the backlight // supports dimming. ((IOPMrootDomain *)root_domain)->publishFeature("DisplayDims"); - - ((IOPMrootDomain *)root_domain)->announcePowerSourceChange(); + return (true); } @@ -1447,6 +2021,117 @@ void IOPMrootDomain::adjustPowerState( void ) } } +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + + +#undef super +#define super OSObject +OSDefineMetaClassAndStructors(PMSettingObject, OSObject) + +void PMSettingObject::setPMSetting(const OSSymbol *type, OSObject *obj) +{ + (*func)(target, type, obj, refcon); +} + +/* + * Static constructor/initializer for PMSettingObject + */ +PMSettingObject *PMSettingObject::pmSettingObject( + IOPMrootDomain *parent_arg, + IOPMSettingControllerCallback handler_arg, + OSObject *target_arg, + uintptr_t refcon_arg, + uint32_t supportedPowerSources, + const OSSymbol * settings[]) +{ + uint32_t objCount = 0; + PMSettingObject *pmso; + + if( !parent_arg || !handler_arg || !settings ) return NULL; + + // count OSSymbol entries in NULL terminated settings array + while( settings[objCount] ) { + objCount++; + } + if(0 == objCount) return NULL; + + pmso = new PMSettingObject; + if(!pmso || !pmso->init()) return NULL; + + pmso->parent = parent_arg; + pmso->func = handler_arg; + pmso->target = target_arg; + pmso->refcon = refcon_arg; + pmso->releaseAtCount = objCount + 1; // release when it has count+1 retains + + pmso->publishedFeatureID = (uint32_t *)IOMalloc(sizeof(uint32_t)*objCount); + if(pmso->publishedFeatureID) { + for(unsigned int i=0; ipublishFeature( settings[i]->getCStringNoCopy(), + supportedPowerSources, &pmso->publishedFeatureID[i] ); + } + } + + return pmso; +} + +void PMSettingObject::free(void) +{ + OSCollectionIterator *settings_iter; + OSSymbol *sym; + OSArray *arr; + int arr_idx; + int i; + int objCount = releaseAtCount - 1; + + if(publishedFeatureID) { + for(i=0; iremovePublishedFeature( publishedFeatureID[i] ); + } + } + + IOFree(publishedFeatureID, sizeof(uint32_t) * objCount); + } + + IORecursiveLockLock(parent->settingsCtrlLock); + + // Search each PM settings array in the kernel. + settings_iter = OSCollectionIterator::withCollection(parent->settingsCallbacks); + if(settings_iter) + { + while(( sym = OSDynamicCast(OSSymbol, settings_iter->getNextObject()) )) + { + arr = (OSArray *)parent->settingsCallbacks->getObject(sym); + arr_idx = arr->getNextIndexOfObject(this, 0); + if(-1 != arr_idx) { + // 'this' was found in the array; remove it + arr->removeObject(arr_idx); + } + } + + settings_iter->release(); + } + + IORecursiveLockUnlock(parent->settingsCtrlLock); + + super::free(); +} + +void PMSettingObject::taggedRelease(const void *tag, const int when) const +{ + // We have n+1 retains - 1 per array that this PMSettingObject is a member + // of, and 1 retain to ourself. When we get a release with n+1 retains + // remaining, we go ahead and free ourselves, cleaning up array pointers + // in free(); + + super::taggedRelease(tag, releaseAtCount); +} + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -1526,3 +2211,23 @@ void IORootParent::wakeSystem ( void ) changePowerStateToPriv(ON_STATE); } +IOReturn IORootParent::changePowerStateToPriv ( unsigned long ordinal ) +{ + IOReturn ret; + + if( SLEEP_STATE == ordinal && sleepSupportedPEFunction ) + { + + // Determine if the machine supports sleep, or must doze. + ret = getPlatform()->callPlatformFunction( + sleepSupportedPEFunction, false, + NULL, NULL, NULL, NULL); + + // If the machine only supports doze, the callPlatformFunction call + // boils down toIOPMrootDomain::setSleepSupported(kPCICantSleep), + // otherwise nothing. + } + + return super::changePowerStateToPriv(ordinal); +} + diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 1fa9a6ec7..6d85f2748 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -45,6 +45,9 @@ extern "C" { #include } +/* Delay period for UPS halt */ +#define kUPSDelayHaltCPU_msec (1000*60*5) + void printDictionaryKeys (OSDictionary * inDictionary, char * inMsg); static void getCStringForObject (OSObject * inObj, char * outStr); @@ -240,17 +243,15 @@ int (*PE_halt_restart)(unsigned int type) = 0; int IOPlatformExpert::haltRestart(unsigned int type) { - IOPMrootDomain *rd = getPMRootDomain(); - OSBoolean *b = 0; - - if(rd) b = (OSBoolean *)OSDynamicCast(OSBoolean, rd->getProperty(OSString::withCString("StallSystemAtHalt"))); - if (type == kPEHangCPU) while (1); - if (kOSBooleanTrue == b) { - // Stall shutdown for 5 minutes, and if no outside force has removed our power, continue with - // a reboot. - IOSleep(1000*60*5); + if (type == kPEUPSDelayHaltCPU) { + // Stall shutdown for 5 minutes, and if no outside force has + // removed our power at that point, proceed with a reboot. + IOSleep( kUPSDelayHaltCPU_msec ); + + // Ideally we never reach this point. + type = kPERestartCPU; } @@ -724,15 +725,17 @@ static void getCStringForObject (OSObject * inObj, char * outStr) } } -/* IOPMPanicOnShutdownHang +/* IOShutdownNotificationsTimedOut * - Called from a timer installed by PEHaltRestart */ -static void IOPMPanicOnShutdownHang(thread_call_param_t p0, thread_call_param_t p1) +static void IOShutdownNotificationsTimedOut( + thread_call_param_t p0, + thread_call_param_t p1) { int type = (int)p0; /* 30 seconds has elapsed - resume shutdown */ - gIOPlatform->haltRestart(type); + if(gIOPlatform) gIOPlatform->haltRestart(type); } @@ -772,8 +775,9 @@ int PEHaltRestart(unsigned int type) bool noWaitForResponses; AbsoluteTime deadline; thread_call_t shutdown_hang; + unsigned int tell_type; - if(type == kPEHaltCPU || type == kPERestartCPU) + if(type == kPEHaltCPU || type == kPERestartCPU || type == kPEUPSDelayHaltCPU) { /* Notify IOKit PM clients of shutdown/restart Clients subscribe to this message with a call to @@ -784,11 +788,19 @@ int PEHaltRestart(unsigned int type) If all goes well the machine will be off by the time the timer expires. */ - shutdown_hang = thread_call_allocate( &IOPMPanicOnShutdownHang, (thread_call_param_t) type); + shutdown_hang = thread_call_allocate( &IOShutdownNotificationsTimedOut, + (thread_call_param_t) type); clock_interval_to_deadline( 30, kSecondScale, &deadline ); thread_call_enter1_delayed( shutdown_hang, 0, deadline ); - noWaitForResponses = pmRootDomain->tellChangeDown2(type); + + if( kPEUPSDelayHaltCPU == type ) { + tell_type = kPEHaltCPU; + } else { + tell_type = type; + } + + noWaitForResponses = pmRootDomain->tellChangeDown2(tell_type); /* This notification should have few clients who all do their work synchronously. @@ -812,16 +824,18 @@ UInt32 PESavePanicInfo(UInt8 *buffer, UInt32 length) long PEGetGMTTimeOfDay(void) { + long result = 0; + if( gIOPlatform) - return( gIOPlatform->getGMTTimeOfDay()); - else - return( 0 ); + result = gIOPlatform->getGMTTimeOfDay(); + + return (result); } void PESetGMTTimeOfDay(long secs) { if( gIOPlatform) - gIOPlatform->setGMTTimeOfDay(secs); + gIOPlatform->setGMTTimeOfDay(secs); } } /* extern "C" */ @@ -1214,6 +1228,12 @@ void IOPlatformExpertDevice::free() workLoop->release(); } +bool IOPlatformExpertDevice::attachToChild( IORegistryEntry * child, + const IORegistryPlane * plane ) +{ + return IOService::attachToChild( child, plane ); +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #undef super diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index 78dbf3527..4be7e3aa5 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -87,9 +87,9 @@ OSDefineMetaClassAndStructors(IORegistryPlane, OSObject) static IORecursiveLock * gPropertiesLock; static SInt32 gIORegistryGenerationCount; -#define UNLOCK s_lock_done( &gIORegistryLock ) -#define RLOCK s_lock_read( &gIORegistryLock ) -#define WLOCK s_lock_write( &gIORegistryLock ); \ +#define UNLOCK lck_rw_done( &gIORegistryLock ) +#define RLOCK lck_rw_lock_shared( &gIORegistryLock ) +#define WLOCK lck_rw_lock_exclusive( &gIORegistryLock ); \ gIORegistryGenerationCount++ // make atomic @@ -108,170 +108,11 @@ static SInt32 gIORegistryGenerationCount; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -struct s_lock_t { - lck_spin_t interlock; /* "hardware" interlock field */ - volatile unsigned int - read_count:16, /* No. of accepted readers */ - want_upgrade:1, /* Read-to-write upgrade waiting */ - want_write:1, /* Writer is waiting, or - locked for write */ - waiting:1, /* Someone is sleeping on lock */ - can_sleep:1; /* Can attempts to lock go to sleep? */ -}; - -static struct s_lock_t gIORegistryLock; - -/* Time we loop without holding the interlock. - * The former is for when we cannot sleep, the latter - * for when our thread can go to sleep (loop less) - * we shouldn't retake the interlock at all frequently - * if we cannot go to sleep, since it interferes with - * any other processors. In particular, 100 is too small - * a number for powerpc MP systems because of cache - * coherency issues and differing lock fetch times between - * the processors - */ -static unsigned int lock_wait_time[2] = { (unsigned int)-1, 100 } ; - -static void -s_lock_init( - s_lock_t *l, - boolean_t can_sleep) -{ - (void) memset((void *) l, 0, sizeof(s_lock_t)); - - lck_spin_init(&l->interlock, IOLockGroup, LCK_ATTR_NULL); - l->want_write = FALSE; - l->want_upgrade = FALSE; - l->read_count = 0; - l->can_sleep = can_sleep; -} - -static void -s_lock_write( - register s_lock_t * l) -{ - register int i; - - lck_spin_lock(&l->interlock); - - /* - * Try to acquire the want_write bit. - */ - while (l->want_write) { - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - lck_spin_unlock(&l->interlock); - while (--i != 0 && l->want_write) - continue; - lck_spin_lock(&l->interlock); - } - - if (l->can_sleep && l->want_write) { - l->waiting = TRUE; - lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, - (event_t) l, THREAD_UNINT); - /* interlock relocked */ - } - } - l->want_write = TRUE; - - /* Wait for readers (and upgrades) to finish */ - - while ((l->read_count != 0) || l->want_upgrade) { - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - lck_spin_unlock(&l->interlock); - while (--i != 0 && (l->read_count != 0 || - l->want_upgrade)) - continue; - lck_spin_lock(&l->interlock); - } - - if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { - l->waiting = TRUE; - lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, - (event_t) l, THREAD_UNINT); - /* interlock relocked */ - } - } - - lck_spin_unlock(&l->interlock); -} - -static void -s_lock_done( - register s_lock_t * l) -{ - boolean_t do_wakeup = FALSE; - - lck_spin_lock(&l->interlock); - - if (l->read_count != 0) { - l->read_count -= 1; - } - else { - if (l->want_upgrade) { - l->want_upgrade = FALSE; - } - else { - l->want_write = FALSE; - } - } - - /* - * There is no reason to wakeup a waiting thread - * if the read-count is non-zero. Consider: - * we must be dropping a read lock - * threads are waiting only if one wants a write lock - * if there are still readers, they can't proceed - */ - if (l->waiting && (l->read_count == 0)) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - lck_spin_unlock(&l->interlock); - - if (do_wakeup) - thread_wakeup((event_t) l); -} - -static void -s_lock_read( - register s_lock_t * l) -{ - register int i; +lck_rw_t gIORegistryLock; +lck_grp_t *gIORegistryLockGrp; +lck_grp_attr_t *gIORegistryLockGrpAttr; +lck_attr_t *gIORegistryLockAttr; - lck_spin_lock(&l->interlock); - - while ( l->want_upgrade || ((0 == l->read_count) && l->want_write )) { - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - - if (i != 0) { - lck_spin_unlock(&l->interlock); - while (--i != 0 && - (l->want_upgrade || ((0 == l->read_count) && l->want_write ))) - continue; - lck_spin_lock(&l->interlock); - } - - if (l->can_sleep && - (l->want_upgrade || ((0 == l->read_count) && l->want_write ))) { - l->waiting = TRUE; - lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, - (event_t) l, THREAD_UNINT); - /* interlock relocked */ - } - } - - l->read_count += 1; - lck_spin_unlock(&l->interlock); - -} /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -281,7 +122,15 @@ IORegistryEntry * IORegistryEntry::initialize( void ) if( !gRegistryRoot) { - s_lock_init( &gIORegistryLock, true ); + + gIORegistryLockGrpAttr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(gIORegistryLockGrpAttr); + gIORegistryLockGrp = lck_grp_alloc_init("IORegistryLock", gIORegistryLockGrpAttr); + gIORegistryLockAttr = lck_attr_alloc_init(); + lck_attr_rw_shared_priority(gIORegistryLockAttr); + //lck_attr_setdebug(gIORegistryLockAttr); + lck_rw_init( &gIORegistryLock, gIORegistryLockGrp, gIORegistryLockAttr); + gRegistryRoot = new IORegistryEntry; gPropertiesLock = IORecursiveLockAlloc(); gIORegistryPlanes = OSDictionary::withCapacity( 1 ); diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index 781fad86c..6fbc9e75e 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -191,6 +191,22 @@ bool IOService::isInactive( void ) const /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#if __i386__ + +// Only used by the intel implementation of +// IOService::requireMaxBusStall(UInt32 __unused ns) +struct BusStallEntry +{ + const IOService *fService; + UInt32 fMaxDelay; +}; + +static OSData *sBusStall = OSData::withCapacity(8 * sizeof(BusStallEntry)); +static IOLock *sBusStallLock = IOLockAlloc(); +#endif /* __i386__ */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + void IOService::initialize( void ) { kern_return_t err; @@ -330,6 +346,7 @@ void IOService::stop( IOService * provider ) void IOService::free( void ) { + requireMaxBusStall(0); if( getPropertyTable()) unregisterAllInterest(); PMfree(); @@ -484,6 +501,8 @@ void IOService::startMatching( IOOptionBits options ) || ((provider = getProvider()) && (provider->__state[1] & kIOServiceSynchronousState)); + if ( options & kIOServiceAsynchronous ) + sync = false; needConfig = (0 == (__state[1] & (kIOServiceNeedConfigState | kIOServiceConfigState))) && (0 == (__state[0] & kIOServiceInactiveState)); @@ -1278,18 +1297,18 @@ IOReturn IOService::messageClient( UInt32 type, OSObject * client, return( ret ); } -void IOService::applyToInterested( const OSSymbol * typeOfInterest, - OSObjectApplierFunction applier, - void * context ) +static void +applyToInterestNotifiers(const IORegistryEntry *target, + const OSSymbol * typeOfInterest, + OSObjectApplierFunction applier, + void * context ) { OSArray * copyArray = 0; - applyToClients( (IOServiceApplierFunction) applier, context ); - LOCKREADNOTIFY(); IOCommand *notifyList = - OSDynamicCast( IOCommand, getProperty( typeOfInterest )); + OSDynamicCast( IOCommand, target->getProperty( typeOfInterest )); if( notifyList) { copyArray = OSArray::withCapacity(1); @@ -1314,6 +1333,14 @@ void IOService::applyToInterested( const OSSymbol * typeOfInterest, } } +void IOService::applyToInterested( const OSSymbol * typeOfInterest, + OSObjectApplierFunction applier, + void * context ) +{ + applyToClients( (IOServiceApplierFunction) applier, context ); + applyToInterestNotifiers(this, typeOfInterest, applier, context); +} + struct MessageClientsContext { IOService * service; UInt32 type; @@ -2881,21 +2908,15 @@ UInt32 IOService::_adjustBusy( SInt32 delta ) next->unlockForArbitration(); if( (wasQuiet || nowQuiet) ) { - OSArray * array; - unsigned int index; - OSObject * interested; - - array = OSDynamicCast( OSArray, next->getProperty( gIOBusyInterest )); - if( array) { - LOCKREADNOTIFY(); - for( index = 0; - (interested = array->getObject( index )); - index++) { - next->messageClient(kIOMessageServiceBusyStateChange, - interested, (void *) wasQuiet /* busy now */); - } - UNLOCKNOTIFY(); - } + MessageClientsContext context; + + context.service = next; + context.type = kIOMessageServiceBusyStateChange; + context.argument = (void *) wasQuiet; // busy now + context.argSize = 0; + + applyToInterestNotifiers( next, gIOBusyInterest, + &messageClientsApplier, &context ); if( nowQuiet && (next == gIOServiceRoot)) OSMetaClass::considerUnloads(); @@ -4010,6 +4031,9 @@ IOReturn IOService::newUserClient( task_t owningTask, void * securityID, IOUserClient *client; OSObject *temp; + if (kIOReturnSuccess == newUserClient( owningTask, securityID, type, handler )) + return kIOReturnSuccess; + // First try my own properties for a user client class name temp = getProperty(gIOUserClientClassKey); if (temp) { @@ -4027,6 +4051,7 @@ IOReturn IOService::newUserClient( task_t owningTask, void * securityID, if (!userClientClass) return kIOReturnUnsupported; + // This reference is consumed by the IOServiceOpen call temp = OSMetaClass::allocClassWithName(userClientClass); if (!temp) return kIOReturnNoMemory; @@ -4061,7 +4086,7 @@ IOReturn IOService::newUserClient( task_t owningTask, void * securityID, IOReturn IOService::newUserClient( task_t owningTask, void * securityID, UInt32 type, IOUserClient ** handler ) { - return( newUserClient( owningTask, securityID, type, 0, handler )); + return( kIOReturnUnsupported ); } IOReturn IOService::requestProbe( IOOptionBits options ) @@ -4296,6 +4321,92 @@ void IOService::setDeviceMemory( OSArray * array ) setProperty( gIODeviceMemoryKey, array); } +/* + * For machines where the transfers on an I/O bus can stall because + * the CPU is in an idle mode, These APIs allow a driver to specify + * the maximum bus stall that they can handle. 0 indicates no limit. + */ +void IOService:: +setCPUSnoopDelay(UInt32 __unused ns) +{ +#if __i386__ + ml_set_maxsnoop(ns); +#endif /* __i386__ */ +} + +UInt32 IOService:: +getCPUSnoopDelay() +{ +#if __i386__ + return ml_get_maxsnoop(); +#else + return 0; +#endif /* __i386__ */ +} + +void IOService:: +requireMaxBusStall(UInt32 __unused ns) +{ +#if __i386__ + static const UInt kNoReplace = -1U; // Must be an illegal index + UInt replace = kNoReplace; + + IOLockLock(sBusStallLock); + + UInt count = sBusStall->getLength() / sizeof(BusStallEntry); + BusStallEntry *entries = (BusStallEntry *) sBusStall->getBytesNoCopy(); + + if (ns) { + const BusStallEntry ne = {this, ns}; + + // Set Maximum bus delay. + for (UInt i = 0; i < count; i++) { + const IOService *thisService = entries[i].fService; + if (this == thisService) + replace = i; + else if (!thisService) { + if (kNoReplace == replace) + replace = i; + } + else { + const UInt32 thisMax = entries[i].fMaxDelay; + if (thisMax < ns) + ns = thisMax; + } + } + + // Must be safe to call from locked context + ml_set_maxbusdelay(ns); + + if (kNoReplace == replace) + sBusStall->appendBytes(&ne, sizeof(ne)); + else + entries[replace] = ne; + } + else { + ns = -1U; // Set to max unsigned, i.e. no restriction + + for (UInt i = 0; i < count; i++) { + // Clear a maximum bus delay. + const IOService *thisService = entries[i].fService; + UInt32 thisMax = entries[i].fMaxDelay; + if (this == thisService) + replace = i; + else if (thisService && thisMax < ns) + ns = thisMax; + } + + // Check if entry found + if (kNoReplace != replace) { + entries[replace].fService = 0; // Null the entry + ml_set_maxbusdelay(ns); + } + } + + IOLockUnlock(sBusStallLock); +#endif /* __i386__ */ +} + /* * Device interrupts */ @@ -4501,6 +4612,8 @@ OSMetaClassDefineReservedUnused(IOService, 44); OSMetaClassDefineReservedUnused(IOService, 45); OSMetaClassDefineReservedUnused(IOService, 46); OSMetaClassDefineReservedUnused(IOService, 47); + +#ifdef __ppc__ OSMetaClassDefineReservedUnused(IOService, 48); OSMetaClassDefineReservedUnused(IOService, 49); OSMetaClassDefineReservedUnused(IOService, 50); @@ -4517,3 +4630,4 @@ OSMetaClassDefineReservedUnused(IOService, 60); OSMetaClassDefineReservedUnused(IOService, 61); OSMetaClassDefineReservedUnused(IOService, 62); OSMetaClassDefineReservedUnused(IOService, 63); +#endif diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index a20e0a6a0..7749a30ea 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -585,7 +585,7 @@ IOReturn IOService::addPowerChild ( IOService * theChild ) // Put ourselves into a usable power state. // We must be in an "on" power state, as our children must be able to access // our hardware after joining the power plane. - makeUsable(); + temporaryMakeUsable(); // make a nub connection = new IOPowerConnection; @@ -631,14 +631,13 @@ IOReturn IOService::addPowerChild ( IOService * theChild ) } -//********************************************************************************* +//****************************************************************************** // removePowerChild // -//********************************************************************************* +//****************************************************************************** IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) { IORegistryEntry *theChild; - OSIterator *iter; pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogRemoveChild,0,0); @@ -693,25 +692,13 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) return IOPMNoErr; } - // Perhaps the departing child was holding up idle or system sleep - we need to re-evaluate our - // childrens' requests. Clear and re-calculate our kIOPMChildClamp and kIOPMChildClamp2 bits. + // Perhaps the departing child was holding up idle or system sleep - we + // need to re-evaluate our childrens' requests. + // Clear and re-calculate our kIOPMChildClamp and kIOPMChildClamp2 bits. rebuildChildClampBits(); - if(!priv->clampOn) - { - // count children - iter = getChildIterator(gIOPowerPlane); - if ( !iter || !iter->getNextObject() ) - { - // paired to match the makeUsable() call in addPowerChild() - changePowerStateToPriv(0); - } - if(iter) iter->release(); - } - - // this may be different now + // Change state if we can now tolerate lower power computeDesiredState(); - // change state if we can now tolerate lower power changeState(); return IOPMNoErr; @@ -1458,6 +1445,40 @@ IOReturn IOService::makeUsable ( void ) return IOPMNoErr; } +//****************************************************************************** +// temporaryMakeUsable +// +// Private function, called by IOService::addPowerChild to ensure that the +// device is temporarily in a usable power state so that attached power +// children may properly initialize. +//****************************************************************************** + +IOReturn IOService::temporaryMakeUsable ( void ) +{ + IOReturn ret = kIOReturnSuccess; + unsigned long tempDesire; + + pm_vars->thePlatform->PMLog( pm_vars->ourName, + PMlogMakeUsable, + PMlogMakeUsable, + priv->deviceDesire); + + if ( pm_vars->theControllingDriver == NULL ) + { + priv->need_to_become_usable = true; + return IOPMNoErr; + } + tempDesire = priv->deviceDesire; + priv->deviceDesire = pm_vars->theNumberOfPowerStates - 1; + computeDesiredState(); + if ( inPlane(gIOPowerPlane) && (pm_vars->parentsKnowState) ) + { + ret = changeState(); + } + priv->deviceDesire = tempDesire; + return ret; +} + //********************************************************************************* // currentCapability @@ -1862,9 +1883,13 @@ void IOService::PM_idle_timer_expiration ( void ) } if ( pm_vars->myCurrentState > 0 ) { + + unsigned long newState = pm_vars->myCurrentState - 1; + IOUnlock(priv->activityLock); - changePowerStateToPriv(pm_vars->myCurrentState - 1); - start_PM_idle_timer(); + changePowerStateToPriv(newState); + if ( newState >= priv->ourDesiredPowerState ) + start_PM_idle_timer(); return; } IOUnlock(priv->activityLock); @@ -2097,7 +2122,12 @@ IOReturn IOService::powerOverrideOffPriv ( void ) // needn't perform the previous change, so we collapse the list a little. //********************************************************************************* -IOReturn IOService::enqueuePowerChange ( unsigned long flags, unsigned long whatStateOrdinal, unsigned long domainState, IOPowerConnection * whichParent, unsigned long singleParentState ) +IOReturn IOService::enqueuePowerChange ( + unsigned long flags, + unsigned long whatStateOrdinal, + unsigned long domainState, + IOPowerConnection * whichParent, + unsigned long singleParentState ) { long newNote; long previousNote; @@ -2186,26 +2216,35 @@ IOReturn IOService::notifyAll ( bool is_prechange ) OSObject * next; IOPowerConnection * connection; - // To prevent acknowledgePowerChange from finishing the change note and removing it from the queue if - // some driver calls it, we inflate the number of pending acks so it cannot become zero. We'll fix it later. - - priv->head_note_pendingAcks =1; + // To prevent acknowledgePowerChange from finishing the change note and + // removing it from the queue if + // some driver calls it, we inflate the number of pending acks so it + // cannot become zero. We'll fix it later. + + if(!acquire_lock()) return IOPMAckImplied; - // OK, we will go through the lists of interested drivers and power domain children - // and notify each one of this change. + priv->head_note_pendingAcks = 1; + // OK, we will go through the lists of interested drivers and + // power domain children and notify each one of this change. + nextObject = priv->interestedDrivers->firstInList(); while ( nextObject != NULL ) { + priv->head_note_pendingAcks +=1; - if (! inform(nextObject, is_prechange) ) + + IOUnlock(priv->our_lock); + + inform(nextObject, is_prechange); + + if(!acquire_lock()) { + goto exit; } + nextObject = priv->interestedDrivers->nextInList(nextObject); } - if (! acquire_lock() ) { - return IOPMNoErr; - } // did they all ack? if ( priv->head_note_pendingAcks > 1 ) { // no @@ -2220,17 +2259,26 @@ IOReturn IOService::notifyAll ( bool is_prechange ) // summing their power consumption pm_vars->thePowerStates[priv->head_note_state].staticPower = 0; - if ( iter ) + if ( iter && acquire_lock()) { while ( (next = iter->getNextObject()) ) { if ( (connection = OSDynamicCast(IOPowerConnection,next)) ) { priv->head_note_pendingAcks +=1; + + IOUnlock(priv->our_lock); + notifyChild(connection, is_prechange); + + if(!acquire_lock()) + { + goto exit; + } } } iter->release(); + IOUnlock(priv->our_lock); } if (! acquire_lock() ) { @@ -2248,6 +2296,9 @@ IOReturn IOService::notifyAll ( bool is_prechange ) // not all acked IOUnlock(priv->our_lock); + +exit: // unable to acquire_lock exit case + return IOPMWillAckLater; } @@ -2272,7 +2323,12 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) // The child has been detached since we grabbed the child iterator. // Decrement pending_acks, already incremented in notifyAll, // to account for this unexpected departure. - priv->head_note_pendingAcks--; + + if( acquire_lock() ) + { + priv->head_note_pendingAcks--; + IOUnlock(priv->our_lock); + } return true; } @@ -2291,7 +2347,12 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) if ( k == IOPMAckImplied ) { // yes - priv->head_note_pendingAcks--; + if( acquire_lock() ) + { + priv->head_note_pendingAcks--; + IOUnlock(priv->our_lock); + } + theNub->setAwaitingAck(false); childPower = theChild->currentPowerConsumption(); if ( childPower == kIOPMUnknown ) @@ -2344,19 +2405,17 @@ bool IOService::inform ( IOPMinformee * nextObject, bool is_prechange ) return true; } - // no, did the return code ack? - if ( k ==IOPMAckImplied ) + if ( (k ==IOPMAckImplied) // no, did the return code ack? + || (k < 0) ) // somebody goofed { // yes nextObject->timer = 0; - priv->head_note_pendingAcks -= 1; - return true; - } - if ( k<0 ) - { - // somebody goofed - nextObject->timer = 0; - priv-> head_note_pendingAcks -= 1; + + if( acquire_lock() ) + { + priv->head_note_pendingAcks--; + IOUnlock(priv->our_lock); + } return true; } @@ -2453,14 +2512,18 @@ void IOService::OurChangeSetPowerState ( void ) { priv->machine_state = kIOPM_OurChangeWaitForPowerSettle; + IOLockLock(priv->our_lock); + if ( instruct_driver(priv->head_note_state) == IOPMAckImplied ) { // it's done, carry on + IOLockUnlock(priv->our_lock); OurChangeWaitForPowerSettle(); } else { // it's not, wait for it pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogStartAckTimer,0,0); start_ack_timer(); + IOLockUnlock(priv->our_lock); // execution will resume via ack_timer_ticked() } } @@ -2640,14 +2703,18 @@ IOReturn IOService::ParentDownSetPowerState_Immediate ( void ) { priv->machine_state = kIOPM_ParentDownWaitForPowerSettle_Delayed; + IOLockLock(priv->our_lock); + if ( instruct_driver(priv->head_note_state) == IOPMAckImplied ) { // it's done, carry on + IOLockUnlock(priv->our_lock); return ParentDownWaitForPowerSettleAndNotifyDidChange_Immediate(); } // it's not, wait for it pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogStartAckTimer,0,0); start_ack_timer(); + IOLockUnlock(priv->our_lock); return IOPMWillAckLater; } @@ -2668,14 +2735,18 @@ void IOService::ParentDownSetPowerState_Delayed ( void ) { priv-> machine_state = kIOPM_ParentDownWaitForPowerSettle_Delayed; + IOLockLock(priv->our_lock); + if ( instruct_driver(priv->head_note_state) == IOPMAckImplied ) { // it's done, carry on + IOLockUnlock(priv->our_lock); ParentDownWaitForPowerSettle_Delayed(); } else { // it's not, wait for it pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogStartAckTimer,0,0); start_ack_timer(); + IOLockUnlock(priv->our_lock); } } @@ -2820,14 +2891,18 @@ void IOService::ParentUpSetPowerState_Delayed ( void ) { priv->machine_state = kIOPM_ParentUpWaitForSettleTime_Delayed; + IOLockLock(priv->our_lock); + if ( instruct_driver(priv->head_note_state) == IOPMAckImplied ) { // it did it, carry on + IOLockUnlock(priv->our_lock); ParentUpWaitForSettleTime_Delayed(); } else { // it didn't, wait for it pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogStartAckTimer,0,0); start_ack_timer(); + IOLockUnlock(priv->our_lock); } } @@ -2847,15 +2922,19 @@ IOReturn IOService::ParentUpSetPowerState_Immediate ( void ) { priv->machine_state = kIOPM_ParentUpWaitForSettleTime_Delayed; + IOLockLock(priv->our_lock); + if ( instruct_driver(priv->head_note_state) == IOPMAckImplied ) { // it did it, carry on + IOLockUnlock(priv->our_lock); return ParentUpWaitForSettleTime_Immediate(); } else { // it didn't, wait for it pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogStartAckTimer,0,0); start_ack_timer(); + IOLockUnlock(priv->our_lock); return IOPMWillAckLater; } } @@ -3881,9 +3960,11 @@ IOReturn IOService::instruct_driver ( unsigned long newState ) priv->driver_timer = -1; // yes, instruct it + IOLockUnlock(priv->our_lock); OUR_PMLog( kPMLogProgramHardware, (UInt32) this, newState); delay = pm_vars->theControllingDriver->setPowerState( newState,this ); OUR_PMLog((UInt32) -kPMLogProgramHardware, (UInt32) this, (UInt32) delay); + IOLockLock(priv->our_lock); // it finished if ( delay == IOPMAckImplied ) diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index 010c14c59..33c3731d8 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -45,6 +45,7 @@ extern "C" { extern void OSlibkernInit (void); +extern void ml_hpet_cfg(uint32_t, uint32_t); #include @@ -59,6 +60,7 @@ void IOKitResetTime( void ) #ifndef i386 IOService::waitForService( IOService::resourceMatching("IONVRAM"), &t ); + #endif clock_initialize_calendar(); diff --git a/iokit/Kernel/IOStringFuncs.c b/iokit/Kernel/IOStringFuncs.c index 46e7a246e..f7d9508b5 100644 --- a/iokit/Kernel/IOStringFuncs.c +++ b/iokit/Kernel/IOStringFuncs.c @@ -357,11 +357,10 @@ strtoq(nptr, endptr, base) * Ignores `locale' stuff. Assumes that the upper and lower case * alphabets and digits are each contiguous. */ -u_quad_t -strtouq(nptr, endptr, base) - const char *nptr; - char **endptr; - register int base; +uint64_t +strtouq(const char *nptr, + char **endptr, + register int base) { register const char *s = nptr; register u_quad_t acc; diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index 4a3df066f..84a551716 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -803,6 +802,22 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, return (kr); } +bool IOUserClient::init() +{ + if( getPropertyTable()) + return true; + else + return super::init(); +} + +bool IOUserClient::init(OSDictionary * dictionary) +{ + if( getPropertyTable()) + return true; + else + return super::init(dictionary); +} + bool IOUserClient::initWithTask(task_t owningTask, void * securityID, UInt32 type ) @@ -1726,6 +1741,7 @@ kern_return_t is_io_registry_entry_get_property_bytes( return( ret ); } + /* Routine io_registry_entry_get_property */ kern_return_t is_io_registry_entry_get_property( io_object_t registry_entry, @@ -1959,7 +1975,7 @@ kern_return_t is_io_service_open( CHECK( IOService, _service, service ); err = service->newUserClient( owningTask, (void *) owningTask, - connect_type, &client ); + connect_type, 0, &client ); if( err == kIOReturnSuccess) { assert( OSDynamicCast(IOUserClient, client) ); @@ -1969,6 +1985,101 @@ kern_return_t is_io_service_open( return( err); } +/* Routine io_service_open_ndr */ +kern_return_t is_io_service_open_extended( + io_object_t _service, + task_t owningTask, + int connect_type, + NDR_record_t ndr, + io_buf_ptr_t properties, + mach_msg_type_number_t propertiesCnt, + natural_t * result, + io_object_t *connection ) +{ + IOUserClient * client = 0; + kern_return_t err = KERN_SUCCESS; + IOReturn res = kIOReturnSuccess; + OSDictionary * propertiesDict = 0; + bool crossEndian; + bool disallowAccess; + + CHECK( IOService, _service, service ); + + do + { + if (properties) + { + OSObject * obj; + vm_offset_t data; + vm_map_offset_t map_data; + + err = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) properties ); + res = err; + data = CAST_DOWN(vm_offset_t, map_data); + if (KERN_SUCCESS == err) + { + // must return success after vm_map_copyout() succeeds + obj = OSUnserializeXML( (const char *) data ); + vm_deallocate( kernel_map, data, propertiesCnt ); + propertiesDict = OSDynamicCast(OSDictionary, obj); + if (!propertiesDict) + { + res = kIOReturnBadArgument; + if (obj) + obj->release(); + } + } + if (kIOReturnSuccess != res) + break; + } + + crossEndian = (ndr.int_rep != NDR_record.int_rep); + if (crossEndian) + { + if (!propertiesDict) + propertiesDict = OSDictionary::withCapacity(4); + OSData * data = OSData::withBytes(&ndr, sizeof(ndr)); + if (data) + { + if (propertiesDict) + propertiesDict->setObject(kIOUserClientCrossEndianKey, data); + data->release(); + } + } + + res = service->newUserClient( owningTask, (void *) owningTask, + connect_type, propertiesDict, &client ); + + if (propertiesDict) + propertiesDict->release(); + + if (res == kIOReturnSuccess) + { + assert( OSDynamicCast(IOUserClient, client) ); + + disallowAccess = (crossEndian + && (kOSBooleanTrue != service->getProperty(kIOUserClientCrossEndianCompatibleKey)) + && (kOSBooleanTrue != client->getProperty(kIOUserClientCrossEndianCompatibleKey))); + + if (disallowAccess) + { + client->clientClose(); + client->release(); + client = 0; + res = kIOReturnUnsupported; + break; + } + client->sharedInstance = (0 != client->getProperty(kIOUserClientSharedInstanceKey)); + } + } + while (false); + + *connection = client; + *result = res; + + return (err); +} + /* Routine io_service_close */ kern_return_t is_io_service_close( io_object_t connection ) @@ -2035,7 +2146,8 @@ kern_return_t is_io_connect_map_memory( if( mapSize) *mapSize = map->getLength(); - if( task != current_task()) { + if( client->sharedInstance + || (task != current_task())) { // push a name out to the task owning the map, // so we can clean up maps #if IOASSERT diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 4108eaac9..81e03f4ab 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -38,8 +38,8 @@ OSDefineMetaClassAndStructors(IOWorkLoop, OSObject); // Block of unused functions intended for future use OSMetaClassDefineReservedUsed(IOWorkLoop, 0); +OSMetaClassDefineReservedUsed(IOWorkLoop, 1); -OSMetaClassDefineReservedUnused(IOWorkLoop, 1); OSMetaClassDefineReservedUnused(IOWorkLoop, 2); OSMetaClassDefineReservedUnused(IOWorkLoop, 3); OSMetaClassDefineReservedUnused(IOWorkLoop, 4); @@ -70,7 +70,8 @@ bool IOWorkLoop::init() return false; controlG = IOCommandGate:: - commandGate(this, (IOCommandGate::Action) &IOWorkLoop::_maintRequest); + commandGate(this, OSMemberFunctionCast(IOCommandGate::Action, + this, &IOWorkLoop::_maintRequest)); if ( !controlG ) return false; @@ -84,7 +85,9 @@ bool IOWorkLoop::init() if (addEventSource(controlG) != kIOReturnSuccess) return false; - workThread = IOCreateThread((thread_continue_t)threadMainContinuation, this); + IOThreadFunc cptr = + OSMemberFunctionCast(IOThreadFunc, this, &IOWorkLoop::threadMain); + workThread = IOCreateThread(cptr, this); if (!workThread) return false; @@ -240,52 +243,55 @@ do { \ #endif /* KDEBUG */ -void IOWorkLoop::threadMainContinuation(IOWorkLoop *self) +/* virtual */ bool IOWorkLoop::runEventSources() { - self->threadMain(); -} - -void IOWorkLoop::threadMain() -{ - CLRP(&fFlags, kLoopRestart); - - for (;;) { - bool more; - IOInterruptState is; + bool res = false; + closeGate(); + if (ISSETP(&fFlags, kLoopTerminate)) + goto abort; IOTimeWorkS(); + bool more; + do { + CLRP(&fFlags, kLoopRestart); + workToDo = more = false; + for (IOEventSource *evnt = eventChain; evnt; evnt = evnt->getNext()) { + + IOTimeClientS(); + more |= evnt->checkForWork(); + IOTimeClientE(); + + if (ISSETP(&fFlags, kLoopTerminate)) + goto abort; + else if (fFlags & kLoopRestart) { + more = true; + break; + } + } + } while (more); - closeGate(); - if (ISSETP(&fFlags, kLoopTerminate)) - goto exitThread; - - do { - workToDo = more = false; - for (IOEventSource *event = eventChain; event; event = event->getNext()) { - - IOTimeClientS(); - more |= event->checkForWork(); - IOTimeClientE(); - - if (ISSETP(&fFlags, kLoopTerminate)) - goto exitThread; - else if (fFlags & kLoopRestart) { - CLRP(&fFlags, kLoopRestart); - continue; - } - } - } while (more); - + res = true; IOTimeWorkE(); - openGate(); +abort: + openGate(); + return res; +} + +/* virtual */ void IOWorkLoop::threadMain() +{ + do { + if ( !runEventSources() ) + goto exitThread; - is = IOSimpleLockLockDisableInterrupt(workToDoLock); + IOInterruptState is = IOSimpleLockLockDisableInterrupt(workToDoLock); if ( !ISSETP(&fFlags, kLoopTerminate) && !workToDo) { assert_wait((void *) &workToDo, false); IOSimpleLockUnlockEnableInterrupt(workToDoLock, is); - thread_block_parameter((thread_continue_t)threadMainContinuation, this); + thread_continue_t cptr = OSMemberFunctionCast( + thread_continue_t, this, &IOWorkLoop::threadMain); + thread_block_parameter(cptr, this); /* NOTREACHED */ } @@ -293,11 +299,7 @@ void IOWorkLoop::threadMain() // to commit suicide. But no matter // Clear the simple lock and retore the interrupt state IOSimpleLockUnlockEnableInterrupt(workToDoLock, is); - if (workToDo) - continue; - else - break; - } + } while(workToDo); exitThread: workThread = 0; // Say we don't have a loop and free ourselves diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index 923831637..47e55b009 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include "RootDomainUserClient.h" #include @@ -38,9 +39,13 @@ OSDefineMetaClassAndStructors(RootDomainUserClient, IOUserClient) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -bool RootDomainUserClient::initWithTask(task_t owningTask, void *security_id, UInt32) +bool RootDomainUserClient::initWithTask(task_t owningTask, void *security_id, + UInt32 type, OSDictionary * properties) { - if (!super::init()) + if (properties) + properties->setObject(kIOUserClientCrossEndianCompatibleKey, kOSBooleanTrue); + + if (!super::initWithTask(owningTask, security_id, type, properties)) return false; fOwningTask = owningTask; @@ -125,13 +130,13 @@ RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 i { static IOExternalMethod sMethods[] = { { // kPMSetAggressiveness, 0 - 1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1 + (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1 }, { // kPMGetAggressiveness, 1 0, (IOMethod)&IOPMrootDomain::getAggressiveness, kIOUCScalarIScalarO, 1, 1 }, { // kPMSleepSystem, 2 - 1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1 + (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1 }, { // kPMAllowPowerChange, 3 0, (IOMethod)&IOPMrootDomain::allowPowerChange, kIOUCScalarIScalarO, 1, 0 @@ -146,7 +151,7 @@ RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 i 0, (IOMethod)&IOPMrootDomain::restartSystem, kIOUCScalarIScalarO, 0, 0 }, { // kPMSetPreventative, 7 - 1, (IOMethod) &RootDomainUserClient::setPreventative, kIOUCScalarIScalarO, 2, 0 + (IOService *)1, (IOMethod)&RootDomainUserClient::setPreventative, kIOUCScalarIScalarO, 2, 0 }, }; diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index b4172ea26..6af6f379c 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -55,7 +55,8 @@ class RootDomainUserClient : public IOUserClient virtual bool start( IOService * provider ); - virtual bool initWithTask(task_t owningTask, void *security_id, UInt32); + virtual bool initWithTask(task_t owningTask, void *security_id, + UInt32 type, OSDictionary * properties); void setPreventative(UInt32 on_off, UInt32 types_of_sleep); diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp index 9814f8ac9..2cbe6f5be 100644 --- a/iokit/KernelConfigTables.cpp +++ b/iokit/KernelConfigTables.cpp @@ -71,13 +71,5 @@ const char * gIOKernelConfigTables = " 'IONameMatch' = nvram;" " }" #endif /* PPC */ -#ifdef i386 -" ," -" {" -" 'IOClass' = AppleIntelClock;" -" 'IOProviderClass' = IOPlatformDevice;" -" 'IONameMatch' = intel-clock;" -" }" -#endif /* i386 */ ")"; diff --git a/iokit/bsddev/DINetBootHook.cpp b/iokit/bsddev/DINetBootHook.cpp index 9ba7b5b91..d93e326c1 100644 --- a/iokit/bsddev/DINetBootHook.cpp +++ b/iokit/bsddev/DINetBootHook.cpp @@ -8,10 +8,16 @@ * Revision History * * $Log: DINetBootHook.cpp,v $ + * Revision 1.3.1558.1 2005/06/24 01:47:25 lindak + * Bringing over all of the Karma changes into chardonnay. + * + * Revision 1.1.1.1 2005/02/24 21:48:06 akosut + * Import xnu-764 from Tiger8A395 + * * Revision 1.3 2002/06/16 20:36:02 lindak * Merged PR-2957314 into Jaguar (siegmund: netboot kernel code needs to set * com.apple.AppleDiskImageController.load to boolean Yes) - * + * * Revision 1.2.40.2 2002/06/15 03:50:38 dieter * - corrected com.apple.AppleDiskImageController.load string * diff --git a/iokit/bsddev/DINetBootHook.h b/iokit/bsddev/DINetBootHook.h index ed1f54381..8f2ffe411 100644 --- a/iokit/bsddev/DINetBootHook.h +++ b/iokit/bsddev/DINetBootHook.h @@ -8,13 +8,19 @@ * Revision History * * $Log: DINetBootHook.h,v $ + * Revision 1.3.1582.1 2005/06/24 01:47:25 lindak + * Bringing over all of the Karma changes into chardonnay. + * + * Revision 1.1.1.1 2005/02/24 21:48:06 akosut + * Import xnu-764 from Tiger8A395 + * * Revision 1.3 2002/05/22 18:50:49 aramesh * Kernel API Cleanup * Bug #: 2853781 * Changes from Josh(networking), Rick(IOKit), Jim & David(osfmk), Umesh, Dan & Ramesh(BSD) * Submitted by: Ramesh * Reviewed by: Vincent - * + * * Revision 1.2.12.1 2002/05/21 23:08:14 aramesh * Kernel API Cleanup * Bug #: 2853781 diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index d93930dd0..1d9f9af62 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -465,6 +465,14 @@ kern_return_t IOFindBSDRoot( char * rootName, do { if( (regEntry = IORegistryEntry::fromPath( "/chosen", gIODTPlane ))) { + data = OSDynamicCast(OSData, regEntry->getProperty( "root-matching" )); + if (data) { + matching = OSDynamicCast(OSDictionary, OSUnserializeXML((char *)data->getBytesNoCopy())); + if (matching) { + continue; + } + } + data = (OSData *) regEntry->getProperty( "boot-uuid" ); if( data) { uuidStr = (const char*)data->getBytesNoCopy(); @@ -564,10 +572,12 @@ kern_return_t IOFindBSDRoot( char * rootName, // from OpenFirmware path IOLog("From path: \"%s\", ", look); - if( forceNet || (0 == strncmp( look, "enet", strlen( "enet" ))) ) { - matching = IONetworkMatching( look, str, kMaxPathBuf ); - } else { - matching = IODiskMatching( look, str, kMaxPathBuf ); + if (!matching) { + if( forceNet || (0 == strncmp( look, "enet", strlen( "enet" ))) ) { + matching = IONetworkMatching( look, str, kMaxPathBuf ); + } else { + matching = IODiskMatching( look, str, kMaxPathBuf ); + } } } @@ -669,7 +679,7 @@ kern_return_t IOFindBSDRoot( char * rootName, IOService * subservice = IOFindMatchingChild( service ); if ( subservice ) service = subservice; } else if ( service && mediaProperty ) { - service = service->getProperty(mediaProperty); + service = (IOService *)service->getProperty(mediaProperty); } major = 0; diff --git a/iokit/conf/Makefile.i386 b/iokit/conf/Makefile.i386 index 1f8be4145..029888c26 100644 --- a/iokit/conf/Makefile.i386 +++ b/iokit/conf/Makefile.i386 @@ -10,18 +10,33 @@ CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) OBJS_NO_WERROR= \ ioconf.o \ UNDRequest.o \ - IOLib.o \ + IOLib.cpo \ IOStringFuncs.o \ + IOCatalogue.cpo \ IOCPU.cpo \ + IOCommandGate.cpo \ IOCommandPool.cpo \ IOCommandQueue.cpo \ - IOKitBSDInit.cpo \ + IOConditionLock.cpo \ + IOFilterInterruptEventSource.cpo \ + IOHibernateIO.cpo \ IOInterruptController.cpo \ IOInterruptEventSource.cpo \ + IOKitBSDInit.cpo \ + IOMapper.cpo \ + IOMemoryCursor.cpo \ + IOMemoryDescriptor.cpo \ + IOPlatformExpert.cpo \ IOPMPowerStateQueue.cpo \ IOPMchangeNoteList.cpo \ IOPMrootDomain.cpo \ + IORangeAllocator.cpo \ + IORegistryEntry.cpo \ + IOService.cpo \ IOServicePM.cpo \ + IOSyncer.cpo \ + IOTimerEventSource.cpo \ + IOUserClient.cpo \ IOWorkLoop.cpo \ RootDomainUserClient.cpo diff --git a/iokit/conf/files b/iokit/conf/files index ab719eb34..149ca7deb 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -11,7 +11,7 @@ iokit/Kernel/WKdmDecompress.c optional iokitcpp iokit/Kernel/IOHibernateIO.cpp optional iokitcpp iokit/Kernel/IOHibernateRestoreKernel.c optional iokitcpp -iokit/Kernel/IOLib.c optional iokitcpp +iokit/Kernel/IOLib.cpp optional iokitcpp iokit/Kernel/IOLocks.cpp optional iokitcpp iokit/Kernel/IOConditionLock.cpp optional iokitcpp iokit/Kernel/IOSyncer.cpp optional iokitcpp @@ -42,11 +42,14 @@ iokit/Kernel/IOCommandQueue.cpp optional iokitcpp iokit/Kernel/IOFilterInterruptEventSource.cpp optional iokitcpp iokit/Kernel/IOTimerEventSource.cpp optional iokitcpp +# Memory system +iokit/Kernel/IOBufferMemoryDescriptor.cpp optional iokitcpp +iokit/Kernel/IODMACommand.cpp optional iokitcpp iokit/Kernel/IODeviceMemory.cpp optional iokitcpp iokit/Kernel/IOMapper.cpp optional iokitcpp -iokit/Kernel/IOMemoryDescriptor.cpp optional iokitcpp +iokit/Kernel/IOCopyMapper.cpp optional iokitcpp iokit/Kernel/IOMemoryCursor.cpp optional iokitcpp -iokit/Kernel/IOBufferMemoryDescriptor.cpp optional iokitcpp +iokit/Kernel/IOMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IOMultiMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IORangeAllocator.cpp optional iokitcpp diff --git a/iokit/conf/files.i386 b/iokit/conf/files.i386 index a604778e1..2193ae37a 100644 --- a/iokit/conf/files.i386 +++ b/iokit/conf/files.i386 @@ -4,8 +4,5 @@ iokit/Kernel/i386/IOSharedLock.s standard iokit/Kernel/i386/IOAsmSupport.s standard -# Real Time Clock hack -iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp optional iokitcpp - # Power Domains iokit/Kernel/IOPMrootDomain.cpp optional iokitcpp diff --git a/iokit/include/architecture/i386/pio.h b/iokit/include/architecture/i386/pio.h deleted file mode 100644 index 408aac2bc..000000000 --- a/iokit/include/architecture/i386/pio.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.2.1 1998/10/13 00:40:44 ehewitt - * Added support for Intel. - * - * Revision 1.1.1.1 1998/09/22 21:05:37 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:38 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.2 1996/07/31 09:46:36 paire - * Merged with nmk20b7_shared (1.1.11.2 -> 1.1.11.1) - * [96/06/10 paire] - * - * Revision 1.1.11.2 1996/06/13 12:38:25 bernadat - * Do not use inline macros when MACH_ASSERT is configured. - * [96/05/24 bernadat] - * - * Revision 1.1.11.1 1996/05/14 13:50:23 paire - * Added new linl and loutl __inline__. - * Added conditional compilation for [l]{in|oub}[bwl]() __inline__. - * [95/11/24 paire] - * - * Revision 1.1.8.1 1994/09/23 02:00:28 ezf - * change marker to not FREE - * [1994/09/22 21:25:52 ezf] - * - * Revision 1.1.4.5 1993/08/09 19:40:41 dswartz - * Add ANSI prototypes - CR#9523 - * [1993/08/06 17:45:57 dswartz] - * - * Revision 1.1.4.4 1993/06/11 15:17:37 jeffc - * CR9176 - ANSI C violations: inb/outb macros must be changed from - * ({ ... }) to inline functions, with proper type definitions. Callers - * must pass proper types to these functions: 386 I/O port addresses - * are unsigned shorts (not pointers). - * [1993/06/10 14:26:10 jeffc] - * - * Revision 1.1.4.3 1993/06/07 22:09:28 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:00:26 jeffc] - * - * Revision 1.1.4.2 1993/06/04 15:28:45 jeffc - * CR9176 - ANSI problems - - * Added casts to get macros to take caddr_t as an I/O space address. - * [1993/06/04 13:45:55 jeffc] - * - * Revision 1.1 1992/09/30 02:25:51 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/05/14 16:14:20 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:13:56 mrt - * Changed to new Mach copyright - * [91/02/01 17:37:08 mrt] - * - * Revision 2.3 90/12/20 16:36:37 jeffreyh - * changes for __STDC__ - * [90/12/07 jeffreyh] - * - * Revision 2.2 90/11/26 14:48:41 rvb - * Pulled from 2.5 - * [90/11/22 10:09:38 rvb] - * - * [90/08/14 mg32] - * - * Now we know how types are factor in. - * Cleaned up a bunch: eliminated ({ for output and flushed unused - * output variables. - * [90/08/14 rvb] - * - * This is how its done in gcc: - * Created. - * [90/03/26 rvb] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -#ifndef I386_PIO_H -#define I386_PIO_H -//#include -//#include -#define MACH_ASSERT 0 - -typedef unsigned short i386_ioport_t; - -/* read a longword */ -extern unsigned long inl( - i386_ioport_t port); -/* read a shortword */ -extern unsigned short inw( - i386_ioport_t port); -/* read a byte */ -extern unsigned char inb( - i386_ioport_t port); -/* write a longword */ -extern void outl( - i386_ioport_t port, - unsigned long datum); -/* write a word */ -extern void outw( - i386_ioport_t port, - unsigned short datum); -/* write a longword */ -extern void outb( - i386_ioport_t port, - unsigned char datum); - -/* input an array of longwords */ -extern void linl( - i386_ioport_t port, - int * data, - int count); -/* output an array of longwords */ -extern void loutl( - i386_ioport_t port, - int * data, - int count); - -/* input an array of words */ -extern void linw( - i386_ioport_t port, - int * data, - int count); -/* output an array of words */ -extern void loutw( - i386_ioport_t port, - int * data, - int count); - -/* input an array of bytes */ -extern void linb( - i386_ioport_t port, - char * data, - int count); -/* output an array of bytes */ -extern void loutb( - i386_ioport_t port, - char * data, - int count); - -#if defined(__GNUC__) && (!MACH_ASSERT) -extern __inline__ unsigned long inl( - i386_ioport_t port) -{ - unsigned long datum; - __asm__ volatile("inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned short inw( - i386_ioport_t port) -{ - unsigned short datum; - __asm__ volatile(".byte 0x66; inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned char inb( - i386_ioport_t port) -{ - unsigned char datum; - __asm__ volatile("inb %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ void outl( - i386_ioport_t port, - unsigned long datum) -{ - __asm__ volatile("outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outw( - i386_ioport_t port, - unsigned short datum) -{ - __asm__ volatile(".byte 0x66; outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outb( - i386_ioport_t port, - unsigned char datum) -{ - __asm__ volatile("outb %0, %1" : : "a" (datum), "d" (port)); -} -#endif /* defined(__GNUC__) && (!MACH_ASSERT) */ -#endif /* I386_PIO_H */ diff --git a/kgmacros b/kgmacros index 2356ffe9e..d1a666c74 100644 --- a/kgmacros +++ b/kgmacros @@ -6,26 +6,8 @@ # # All the convenience variables used by these macros begin with $kgm_ -define showversion -#Display version string, a pointer to which is pinned at 0x501C in the kernel's -#low memory globals - p (char *) *0x501c -end - -document showversion -Syntax: showversion -| Read the kernel version string from a fixed address in low -| memory. Useful if you don't know which kernel is on the other end, -| and need to find the appropriate symbols. Beware that if you've -| loaded a symbol file, but aren't connected to a remote target, -| the version string from the symbol file will be displayed instead. -| This macro expects to be connected to the remote kernel to function -| correctly. -end - -set $kgm_dummy = &proc0 -set $kgm_dummy = &kmod -set $kgm_mtype = ((struct mach_header)_mh_execute_header).cputype +set print asm-demangle on +set cp-abi gnu-v2 echo Loading Kernel GDB Macros package. Type "help kgm" for more info.\n @@ -54,7 +36,13 @@ document kgm | showallipc Display a summary listing of all the ipc spaces | showallrights Display a summary listing of all the ipc rights | showallkmods Display a summary listing of all the kernel modules -| showallclasses Display info about all OSObject subclasses in the system +| +| showallclasses Display info about all OSObject subclasses in the system +| showobject Show info about an OSObject - its vtable ptr and retain count, & more info for simple container classes. +| showregistry Show info about all registry entries in the current plane +| showregistryprops Show info about all registry entries in the current plane, and their properties +| showregistryentry Show info about a registry entry; its properties and descendants in the current plane +| setregistryplane Set the plane to be used for the iokit registry macros (pass zero for list) | | showtask Display info about the specified task | showtaskthreads Display info about the threads in the task @@ -104,13 +92,45 @@ document kgm | disablecore Configure the kernel to disable coredump transmission | switchtocorethread Corefile version of "switchtoact" | resetcorectx Corefile version of "resetctx" -| +| +| readphys Reads the specified untranslated address +| readphys64 Reads the specified untranslated 64-bit address +| | kdp-reboot Restart remote target | | Type "help " for more specific help on a particular macro. | Type "show user " to see what the macro is really doing. end +# This macro should appear before any symbol references, to facilitate +# a gdb "source" without a loaded symbol file. +define showversion + printf "%s\n", *(char **)0x501C +end + +document showversion +Syntax: showversion +| Read the kernel version string from a fixed address in low +| memory. Useful if you don't know which kernel is on the other end, +| and need to find the appropriate symbols. Beware that if you've +| loaded a symbol file, but aren't connected to a remote target, +| the version string from the symbol file will be displayed instead. +| This macro expects to be connected to the remote kernel to function +| correctly. +end + +set $kgm_dummy = &proc0 +set $kgm_dummy = &kmod +set $kgm_mtype = ((struct mach_header)_mh_execute_header).cputype + +set $kgm_reg_depth = 0 +set $kgm_reg_plane = (void **) gIOServicePlane +set $kgm_namekey = (OSSymbol *) 0 +set $kgm_childkey = (OSSymbol *) 0 + +set $kgm_show_object_addrs = 0 +set $kgm_show_object_retain = 0 +set $kgm_show_props = 0 define showkmodheader printf "kmod address size " @@ -135,7 +155,7 @@ set $kgm_fkmodmax = 0xffffffff set $kgm_pkmod = 0 set $kgm_pkmodst = 0 set $kgm_pkmoden = 0 -define showkmodaddr +define showkmodaddrint printf "0x%x" , $arg0 if ((unsigned int)$arg0 >= (unsigned int)$kgm_pkmodst) && ((unsigned int)$arg0 <= (unsigned int)$kgm_pkmoden) set $kgm_off = ((unsigned int)$arg0 - (unsigned int)$kgm_pkmodst) @@ -149,7 +169,7 @@ define showkmodaddr set $kgm_kmodmin = $kgm_kmod.address end if ($kgm_kmod.address + $kgm_kmod.size) > $kgm_kmodmax - set $kgm_kmodmax = $kgm_kmod.address + set $kgm_kmodmax = $kgm_kmod.address + $kgm_kmod.size end set $kgm_off = ((unsigned int)$arg0 - (unsigned int)$kgm_kmod.address) if ($kgm_kmod.address <= $arg0) && ($kgm_off <= $kgm_kmod.size) @@ -169,6 +189,12 @@ define showkmodaddr end end end + +define showkmodaddr + showkmodaddrint $arg0 + printf "\n" +end + document showkmodaddr | Given an address, print the offset and name for the kmod containing it | The following is the syntax: @@ -235,10 +261,10 @@ define showactint if $kgm_state & 0x01 printf "W\t" printf "0x%08x ", $kgm_thread.wait_queue - - if ((unsigned)$kgm_thread.wait_event > (unsigned)sectPRELINKB) - showkmodaddr $kgm_thread.wait_event - else + if (((unsigned)$kgm_thread.wait_event > (unsigned)sectPRELINKB) \ + && ($arg1 != 2)) + showkmodaddr $kgm_thread.wait_event + else output /a (unsigned) $kgm_thread.wait_event end end @@ -251,37 +277,45 @@ define showactint if ($kgm_mtype == 18) set $mysp = $kgm_thread.machine.pcb->save_r1 else - set $kgm_statep = (struct i386_kernel_state *) \ + set $kgm_statep = (struct x86_kernel_state32 *) \ ($kgm_thread->kernel_stack + 0x4000 \ - - sizeof(struct i386_kernel_state)) + - sizeof(struct x86_kernel_state32)) set $mysp = $kgm_statep->k_ebp end - set $prevsp = 0 + set $prevsp = $mysp - 16 printf "\n\t\tstacktop=0x%08x", $mysp if ($kgm_mtype == 18) set $stkmask = 0xf - set $stklimit = 0xb0000000 else set $stkmask = 0x3 - set $stklimit = 0xfc000000 end + set $kgm_return = 0 while ($mysp != 0) && (($mysp & $stkmask) == 0) \ - && ($mysp < $stklimit) \ - && ((unsigned)$mysp > (unsigned)$prevsp) + && ($mysp != $prevsp) \ + && ((((unsigned) $mysp ^ (unsigned) $prevsp) < 0x2000) \ + || (((unsigned)$mysp < ((unsigned) ($kgm_thread->kernel_stack+0x4000))) \ + && ((unsigned)$mysp > (unsigned) ($kgm_thread->kernel_stack)))) + + if ((unsigned) $kgm_return > (unsigned) sectPRELINKB) + showkmodaddr $kgm_return + else + if ((unsigned) $kgm_return > 0) + output /a (unsigned) $kgm_return + end + end printf "\n\t\t0x%08x ", $mysp if ($kgm_mtype == 18) set $kgm_return = *($mysp + 8) else set $kgm_return = *($mysp + 4) end - if ((unsigned) $kgm_return > (unsigned) sectPRELINKB) - showkmodaddr $kgm_return - else - output /a (unsigned) $kgm_return - end set $prevsp = $mysp set $mysp = * $mysp end + if ((unsigned) $kgm_return > 0) + output/a $kgm_return + end + set $kgm_return = 0 printf "\n\t\tstackbottom=0x%08x", $prevsp else printf "\n\t\t\tcontinuation=" @@ -358,6 +392,7 @@ document showcurrentthreads | (gdb) showcurrentthreads end +set $decode_wait_events = 0 define showallstacks set $kgm_head_taskp = &default_pset.tasks set $kgm_taskp = (struct task *)($kgm_head_taskp->next) @@ -368,17 +403,25 @@ define showallstacks set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) while $kgm_actp != $kgm_head_actp showactheader - showactint $kgm_actp 1 + if ($decode_wait_events > 0) + showactint $kgm_actp 1 + else + showactint $kgm_actp 2 + end set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) end printf "\n" set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) end end + document showallstacks | Routine to print out the stack for each thread in the system. | The following is the syntax: | (gdb) showallstacks +| If the variable $decode_wait_events is non-zero, the routine attempts to +| interpret thread wait_events as kernel module offsets, which can add to +| processing time. end define showcurrentstacks @@ -395,6 +438,7 @@ set $kgm_prp = processor_list set $kgm_prp = ($kgm_prp)->processor_list end end + document showcurrentstacks | Routine to print out the thread running on each cpu (incl. its stack) | The following is the syntax: @@ -1426,25 +1470,44 @@ end set $kdp_act_counter = 0 define switchtoact + set $newact = (struct thread *) $arg0 + if ($newact->kernel_stack == 0) + echo This activation does not have a stack.\n + echo continuation: + output/a (unsigned) $newact.continuation + echo \n + else if ($kgm_mtype == 18) if ($kdp_act_counter == 0) - set $kdpstate = (struct savearea *) kdp.saved_state + set $kdpstate = (struct savearea *) kdp.saved_state end set $kdp_act_counter = $kdp_act_counter + 1 set $newact = (struct thread *) $arg0 - if ($newact->kernel_stack == 0) - echo This activation does not have a stack.\n - echo continuation: - output/a (unsigned) $newact.continuation - echo \n - else - set (struct savearea *) kdp.saved_state=$newact->machine->pcb - flush - set $pc=$newact->machine->pcb.save_srr0 - update - end + set (struct savearea *) kdp.saved_state=$newact->machine->pcb + flushregs + flushstack + set $pc=$newact->machine->pcb.save_srr0 + update else - echo switchtoact not implemented for this architecture.\n + set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state + if ($kdp_act_counter == 0) + set $kdpstate = *($kdpstatep) + end + set $kdp_act_counter = $kdp_act_counter + 1 + + set $kgm_statep = (struct x86_kernel_state32 *) \ + ($newact->kernel_stack + 0x4000 \ + - sizeof(struct x86_kernel_state32)) + set $kdpstatep->ebx = $kgm_statep->k_ebx + set $kdpstatep->ebp = $kgm_statep->k_ebp + set $kdpstatep->edi = $kgm_statep->k_edi + set $kdpstatep->esi = $kgm_statep->k_esi + set $kdpstatep->eip = $kgm_statep->k_eip + flushregs + flushstack + set $pc = $kgm_statep->k_eip + update + end end end @@ -1464,7 +1527,8 @@ define switchtoctx end set $kdp_act_counter = $kdp_act_counter + 1 set (struct savearea *) kdp.saved_state=(struct savearea *) $arg0 - flush + flushregs + flushstack set $pc=((struct savearea *) $arg0)->save_srr0 update else @@ -1483,12 +1547,19 @@ end define resetctx if ($kgm_mtype == 18) set (struct savearea *)kdp.saved_state=$kdpstate - flush + flushregs + flushstack set $pc=((struct savearea *) kdp.saved_state)->save_srr0 update set $kdp_act_counter = 0 else - echo resetctx not implemented for this architecture.\n + set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state + set *($kdpstatep)=$kdpstate + flushregs + flushstack + set $pc=$kdpstatep->eip + update + set $kdp_act_counter = 0 end end @@ -1600,7 +1671,8 @@ end define _kgm_flush_loop set $kgm_flush_loop_ctr = 0 while ($kgm_flush_loop_ctr < 30) - flush + flushregs + flushstack set $kgm_flush_loop_ctr = $kgm_flush_loop_ctr + 1 end end @@ -1613,6 +1685,41 @@ define _kgm_update_loop end end +#This is necessary since gdb often doesn't do backtraces on x86 correctly +#in the absence of symbols.The code below in showuserstack and +#showx86backtrace also contains several workarouds for the gdb bug where +#gdb stops macro evaluation because of spurious "Cannot read memory" +#errors on x86. These errors appear on ppc as well, but they don't +#always stop macro evaluation. + +set $kgm_cur_ebp = 0 +set $kgm_cur_eip = 0 + +define showx86backtrace + if ($kgm_cur_ebp == 0) + set $kgm_cur_ebp = $ebp + end + if ($kgm_cur_eip == 0) + set $kgm_cur_eip = $eip + end + printf "0: EBP: 0x%08x EIP: 0x%08x\n", $kgm_cur_ebp, $kgm_cur_eip + x/i $kgm_cur_eip + set $kgm_prev_ebp = *((uint32_t *) $kgm_cur_ebp) + set $kgm_prev_eip = *((uint32_t *) ($kgm_cur_ebp + 4)) + set $kgm_frameno = 1 + while $kgm_prev_ebp != 0 + printf "%d: saved EBP: 0x%08x saved EIP: 0x%08x\n", $kgm_frameno, $kgm_prev_ebp, $kgm_prev_eip + x/i $kgm_prev_eip + set $kgm_cur_ebp = $kgm_prev_ebp + set $kgm_prev_ebp = *((uint32_t *) $kgm_cur_ebp) + set $kgm_prev_eip = *((uint32_t *) ($kgm_cur_ebp + 4)) + set $kgm_frameno = $kgm_frameno + 1 + end + set $kgm_cur_ebp = 0 + set $kgm_cur_eip = 0 + set kdp_pmap = 0 +end + define showuserstack if ($kgm_mtype == 18) if ($kdp_act_counter == 0) @@ -1644,10 +1751,22 @@ define showuserstack _kgm_update_loop end else - echo showuserstack not implemented for this architecture.\n - end + set $newact = (struct thread *) $arg0 + set $newiss = (x86_saved_state32_t *) ($newact->machine.pcb->iss) + set $checkpc = $newiss.eip + if ($checkpc == 0) + echo This activation does not appear to have + echo \20 a valid user context.\n + else + set $kgm_cur_ebp = $newiss.ebp + set $kgm_cur_eip = $checkpc + printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread (0x%08x); you can also examine memory locations in this address space (pmap 0x%08x) before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n", $arg0, $newact->task->map->pmap + set kdp_pmap = $newact->task->map->pmap + _kgm_flush_loop + _kgm_update_loop + end + end end - document showuserstack Syntax: showuserstack
|This command displays a numeric backtrace for the user space stack of @@ -1843,9 +1962,9 @@ define showgdbthread if ($kgm_mtype == 18) set $mysp = $kgm_thread.machine.pcb->save_r1 else - set $kgm_statep = (struct i386_kernel_state *) \ + set $kgm_statep = (struct x86_kernel_state32 *) \ ($kgm_thread->kernel_stack + 0x4000 \ - - sizeof(struct i386_kernel_state)) + - sizeof(struct x86_kernel_state32)) set $mysp = $kgm_statep->k_ebp end set $prevsp = 0 @@ -1955,22 +2074,429 @@ Syntax: switchtouserthread
end define showmetaclass - set cp-abi gnu-v2 set $kgm_metaclassp = (OSMetaClass *)$arg0 printf "%-5d", $kgm_metaclassp->instanceCount printf "x %5d bytes", $kgm_metaclassp->classSize printf " %s\n", $kgm_metaclassp->className->string end -define showallclasses - set cp-abi gnu-v2 +define showstring + printf "\"%s\"", ((OSString *)$arg0)->string +end + +define shownumber + printf "%lld", ((OSNumber *)$arg0)->value +end + +define showboolean + if ($arg0 == gOSBooleanFalse) + printf "No" + else + printf "Yes" + end +end + +define showdata + set $kgm_data = (OSData *)$arg0 + + printf "<" + set $kgm_datap = (const unsigned char *) $kgm_data->data + + set $kgm_printstr = 0 + if (0 == (3 & (unsigned int)$kgm_datap) && ($kgm_data->length >= 3)) + set $kgm_bytes = *(unsigned int *) $kgm_datap + if (0xffff0000 & $kgm_bytes) + set $kgm_idx = 0 + set $kgm_printstr = 1 + while ($kgm_idx++ < 4) + set $kgm_bytes = $kgm_bytes >> 8 + set $kgm_char = 0xff & $kgm_bytes + if ($kgm_char && (($kgm_char < 0x20) || ($kgm_char > 0x7e))) + set $kgm_printstr = 0 + end + end + end + end + + set $kgm_idx = 0 + if ($kgm_printstr) + set $kgm_quoted = 0 + while ($kgm_idx < $kgm_data->length) + set $kgm_char = $kgm_datap[$kgm_idx++] + if ($kgm_char) + if (0 == $kgm_quoted) + set $kgm_quoted = 1 + if ($kgm_idx > 1) + printf ",\"" + else + printf "\"" + end + end + printf "%c", $kgm_char + else + if ($kgm_quoted) + set $kgm_quoted = 0 + printf "\"" + end + end + end + if ($kgm_quoted) + printf "\"" + end + else + if (0 == (3 & (unsigned int)$kgm_datap)) + while (($kgm_idx + 3) <= $kgm_data->length) + printf "%08x", *(unsigned int *) &$kgm_datap[$kgm_idx] + set $kgm_idx = $kgm_idx + 4 + end + end + while ($kgm_idx < $kgm_data->length) + printf "%02x", $kgm_datap[$kgm_idx++] + end + end + printf ">" +end + +define showdictionaryint + set $kgm$arg0_dict = (OSDictionary *)$arg1 + + printf "{" + set $kgm$arg0_idx = 0 + while ($kgm$arg0_idx < $kgm$arg0_dict->count) + set $kgm_obj = $kgm$arg0_dict->dictionary[$kgm$arg0_idx].key + showobjectint _$arg0 $kgm_obj + printf "=" + set $kgm_obj = $kgm$arg0_dict->dictionary[$kgm$arg0_idx++].value + showobjectint _$arg0 $kgm_obj + if ($kgm$arg0_idx < $kgm$arg0_dict->count) + printf "," + end + end + printf "}" +end + +define indent + set $kgm_idx = 0 + while ($kgm_idx < $arg0) + if ($arg1 & (1 << $kgm_idx++)) + printf "| " + else + printf " " + end + end +end + +define showregdictionary + indent $kgm_reg_depth+2 $arg1 + printf "{\n" + + set $kgm_reg_idx = 0 + while ($kgm_reg_idx < $arg0->count) + indent $kgm_reg_depth+2 $arg1 + printf " " + set $kgm_obj = $arg0->dictionary[$kgm_reg_idx].key + showobjectint _ $kgm_obj + printf " = " + + set $kgm_obj = $arg0->dictionary[$kgm_reg_idx++].value + showobjectint _ $kgm_obj + printf "\n" + end + indent $kgm_reg_depth+2 $arg1 + printf "}\n" +end + + +define showarraysetint + set $kgm$arg0_array = (OSArray *)$arg1 + + set $kgm$arg0_idx = 0 + while ($kgm$arg0_idx < $kgm$arg0_array->count) + set $kgm_obj = $kgm$arg0_array->array[$kgm$arg0_idx++] + showobjectint _$arg0 $kgm_obj + if ($kgm$arg0_idx < $kgm$arg0_array->count) + printf "," + end + end +end + +define showarrayint + printf "(" + showarraysetint $arg0 $arg1 + printf ")" +end + +define showsetint + set $kgm_array = ((OSSet *)$arg1)->members + printf "[" + showarraysetint $arg0 $kgm_array + printf "]" +end + + +define showobjectint + set $kgm_obj = (OSObject *) $arg1 + set $kgm_vt = *((void **) $arg1) + + if ($kgm_show_object_addrs) + printf "`object %p, vt ", $arg1 + output /a (unsigned) $kgm_vt + if ($kgm_show_object_retain) + printf ", retain count %d, container retain %d", (0xffff & $kgm_obj->retainCount), $kgm_obj->retainCount >> 16 + end + printf "` " + end + + if ($kgm_vt == _ZTV8OSString) + showstring $arg1 + else + if ($kgm_vt == _ZTV8OSSymbol) + showstring $arg1 + else + if ($kgm_vt == _ZTV8OSNumber) + shownumber $arg1 + else + if ($kgm_vt == _ZTV6OSData) + showdata $arg1 + else + if ($kgm_vt == _ZTV9OSBoolean) + showboolean $arg1 + else + if ($kgm_vt == _ZTV12OSDictionary) + showdictionaryint _$arg0 $arg1 + else + if ($kgm_vt == _ZTV7OSArray) + showarrayint _$arg0 $arg1 + else + if ($kgm_vt == _ZTV5OSSet) + showsetint _$arg0 $arg1 + else + if ($kgm_show_object_addrs == 0) + printf "`object %p, vt ", $arg1 + output /a (unsigned) $kgm_vt + printf "`" + end + end + end + end + end + end + end + end + end +end + +define showobject + set $kgm_save = $kgm_show_object_addrs + set $kgm_show_object_addrs = 1 + set $kgm_show_object_retain = 1 + showobjectint _ $arg0 + set $kgm_show_object_addrs = $kgm_save + set $kgm_show_object_retain = 0 + printf "\n" +end +document showobject +| Show info about an OSObject - its vtable ptr and retain count. +| If the object is a simple container class, more info will be shown. +| The following is the syntax: +| (gdb) showobject +end + +define dictget + set $kgm_dictp = (OSDictionary *)$arg0 + set $kgm_keyp = (const OSSymbol *)$arg1 + set $kgm_idx = 0 + set $kgm_result = 0 + while (($kgm_idx < $kgm_dictp->count) && ($kgm_result == 0)) + if ($kgm_keyp == $kgm_dictp->dictionary[$kgm_idx].key) + set $kgm_result = $kgm_dictp->dictionary[$kgm_idx].value + end + set $kgm_idx = $kgm_idx + 1 + end +end + + +define showregistryentryrecurse + set $kgm_re = (IOService *)$arg1 + set $kgm$arg0_stack = (unsigned long long) $arg2 + + if ($arg3) + set $kgm$arg0_stack = $kgm$arg0_stack | (1ULL << $kgm_reg_depth) + else + set $kgm$arg0_stack = $kgm$arg0_stack & ~(1ULL << $kgm_reg_depth) + end + + dictget $kgm_re->fRegistryTable $kgm_childkey + set $kgm$arg0_child_array = (OSArray *) $kgm_result + + if ($kgm$arg0_child_array) + set $kgm$arg0_child_count = $kgm$arg0_child_array->count + else + set $kgm$arg0_child_count = 0 + end + + if ($kgm$arg0_child_count) + set $kgm$arg0_stack = $kgm$arg0_stack | (2ULL << $kgm_reg_depth) + else + set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) + end + + indent $kgm_reg_depth $kgm$arg0_stack + printf "+-o " + + dictget $kgm_re->fRegistryTable $kgm_namekey + if ($kgm_result == 0) + dictget $kgm_re->fRegistryTable gIONameKey + end + if ($kgm_result == 0) + dictget $kgm_re->fPropertyTable gIOClassKey + end + + if ($kgm_result != 0) + printf "%s", ((OSString *)$kgm_result)->string + else + if (((IOService*)$kgm_re)->pm_vars && ((IOService*)$kgm_re)->pm_vars->ourName) + printf "%s", ((IOService*)$kgm_re)->pm_vars->ourName + else +# printf ", guessclass " +# guessclass $kgm_re + printf "??" + end + end + + + printf " __state[0] + # kIOServiceRegisteredState + if (0 == ($kgm_state & 2)) + printf "!" + end + printf "registered, " + # kIOServiceMatchedState + if (0 == ($kgm_state & 4)) + printf "!" + end + printf "matched, " + # kIOServiceInactiveState + if ($kgm_state & 1) + printf "in" + end + printf "active, busy %d, retain count %d", (0xff & $kgm_re->__state[1]), (0xffff & $kgm_re->retainCount) + end + printf ">\n" + + if ($kgm_show_props) + set $kgm_props = $kgm_re->fPropertyTable + showregdictionary $kgm_props $kgm$arg0_stack + end + + # recurse + if ($kgm$arg0_child_count != 0) + + set $kgm_reg_depth = $kgm_reg_depth + 1 + set $kgm$arg0_child_idx = 0 + + while ($kgm$arg0_child_idx < $kgm$arg0_child_count) + set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] + set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) + showregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib + end + + set $kgm_reg_depth = $kgm_reg_depth - 1 + end +end + +define showregistryentryint + set $kgm_namekey = (OSSymbol *) $kgm_reg_plane[2] + set $kgm_childkey = (OSSymbol *) $kgm_reg_plane[4] + + showregistryentryrecurse _ $arg0 0 0 +end + +define showregistry + set $kgm_reg_depth = 0 + set $kgm_show_props = 0 + showregistryentryint gRegistryRoot +end +document showregistry +| Show info about all registry entries in the current plane. +| The following is the syntax: +| (gdb) showregistry +end + +define showregistryprops + set $kgm_reg_depth = 0 + set $kgm_show_props = 1 + showregistryentryint gRegistryRoot +end +document showregistryprops +| Show info about all registry entries in the current plane, and their properties. +| set $kgm_show_object_addrs = 1 and/or set $kgm_show_object_retain = 1 will display +| more verbose information +| The following is the syntax: +| (gdb) showregistryprops +end + +define showregistryentry + set $kgm_reg_depth = 0 + set $kgm_show_props = 1 + showregistryentryint $arg0 +end +document showregistryentry +| Show info about a registry entry; its properties and descendants in the current plane. +| The following is the syntax: +| (gdb) showregistryentry +end + +define setregistryplane + if ($arg0) + set $kgm_reg_plane = (void **) $arg0 + else + showobjectint _ gIORegistryPlanes + printf "\n" + end +end +document setregistryplane +| Set the plane to be used for the iokit registry macros. An argument of zero will +| display known planes. +| The following is the syntax: +| (gdb) setregistryplane +end + +define guessclass set $kgm_classidx = 0 + set $kgm_lookvt = *((void **) $arg0) + set $kgm_bestvt = (void *) 0 + set $kgm_bestidx = 0 + while $kgm_classidx < sAllClassesDict->count set $kgm_meta = (OSMetaClass *) sAllClassesDict->dictionary[$kgm_classidx].value - showmetaclass $kgm_meta + + set $kgm_vt = *((void **) $kgm_meta) + + if (($kgm_vt > $kgm_bestvt) && ($kgm_vt < $kgm_lookvt)) + set $kgm_bestvt = $kgm_vt + set $kgm_bestidx = $kgm_classidx + end set $kgm_classidx = $kgm_classidx + 1 end + printf "%s", sAllClassesDict->dictionary[$kgm_bestidx].key->string end + +define showallclasses + set $kgm_classidx = 0 + while $kgm_classidx < sAllClassesDict->count + set $kgm_meta = (OSMetaClass *) sAllClassesDict->dictionary[$kgm_classidx++].value + showmetaclass $kgm_meta + end +end + document showallclasses | Show the instance counts and ivar size of all OSObject subclasses. See ioclasscount man page for details. | The following is the syntax: @@ -1989,3 +2515,31 @@ document showioalloc | The following is the syntax: | (gdb) showioalloc end + +define readphys + set kdp_trans_off = 1 + x/x $arg0 + set kdp_trans_off = 0 +end + +define readphys64 + if ($kgm_mtype == 18) + set kdp_src_high32 = ((uint32_t) ($arg0)) >> 32 + x/x (uint32_t) (($arg0) & 0x00000000ffffffffUL) + set kdp_src_high32 = 0 + else + echo readphys64 not available on this architecture.\n + end +end + +document readphys +| The argument is interpreted as a physical address, and the word addressed is +| displayed. While this fails if no physical page exists at the given address, +| it must be used with caution. +end + +document readphys64 +| The argument is interpreted as a 64-bit physical address, and the word +| addressed is displayed. While this fails if no physical page exists at the +| given address, it must be used with caution. +end diff --git a/libkern/Makefile b/libkern/Makefile index c9ed0dba2..eca7e6882 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -4,9 +4,9 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir -export COMP_LDFLAGS_COMPONENT_PPC = -i_OSCompareAndSwap:_hw_compare_and_store \ - -i_OSDequeueAtomic:_hw_dequeue_atomic \ - -i_OSEnqueueAtomic:_hw_queue_atomic +export COMP_LDFLAGS_COMPONENT_PPC = -Wl,-i_OSCompareAndSwap:_hw_compare_and_store \ + -Wl,-i_OSDequeueAtomic:_hw_dequeue_atomic \ + -Wl,-i_OSEnqueueAtomic:_hw_queue_atomic include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/libkern/c++/OSCollectionIterator.cpp b/libkern/c++/OSCollectionIterator.cpp index ca0a03f06..8302a2d22 100644 --- a/libkern/c++/OSCollectionIterator.cpp +++ b/libkern/c++/OSCollectionIterator.cpp @@ -70,7 +70,7 @@ OSCollectionIterator::withCollection(const OSCollection *inColl) void OSCollectionIterator::free() { if (collIterator) { - kfree((vm_offset_t)collIterator, collection->iteratorSize()); + kfree(collIterator, collection->iteratorSize()); ACCUMSIZE(-(collection->iteratorSize())); collIterator = 0; } diff --git a/libkern/c++/OSData.cpp b/libkern/c++/OSData.cpp index f921adc02..4804c8798 100644 --- a/libkern/c++/OSData.cpp +++ b/libkern/c++/OSData.cpp @@ -57,7 +57,7 @@ bool OSData::initWithCapacity(unsigned int inCapacity) if (data && (!inCapacity || capacity < inCapacity) ) { // clean out old data's storage if it isn't big enough - kfree((vm_address_t) data, capacity); + kfree(data, capacity); data = 0; ACCUMSIZE(-capacity); } @@ -182,7 +182,7 @@ OSData *OSData::withData(const OSData *inData, void OSData::free() { if (capacity != EXTERNAL && data && capacity) { - kfree((vm_offset_t)data, capacity); + kfree(data, capacity); ACCUMSIZE( -capacity ); } super::free(); @@ -217,7 +217,7 @@ unsigned int OSData::ensureCapacity(unsigned int newCapacity) bzero(newData + capacity, newCapacity - capacity); if (data) { bcopy(data, newData, capacity); - kfree((vm_offset_t)data, capacity); + kfree(data, capacity); } ACCUMSIZE( newCapacity - capacity ); data = (void *) newData; diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index 4796f039d..eb177a56d 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -234,7 +234,7 @@ void OSDictionary::free() (void) super::setOptions(0, kImmutable); flushCollection(); if (dictionary) { - kfree((vm_offset_t)dictionary, capacity * sizeof(dictEntry)); + kfree(dictionary, capacity * sizeof(dictEntry)); ACCUMSIZE( -(capacity * sizeof(dictEntry)) ); } @@ -277,7 +277,7 @@ unsigned int OSDictionary::ensureCapacity(unsigned int newCapacity) bzero(&newDict[capacity], newSize - oldSize); ACCUMSIZE(newSize - oldSize); - kfree((vm_offset_t)dictionary, oldSize); + kfree(dictionary, oldSize); dictionary = newDict; capacity = newCapacity; diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp index 4e3a92ab5..b2303e84d 100644 --- a/libkern/c++/OSMetaClass.cpp +++ b/libkern/c++/OSMetaClass.cpp @@ -38,6 +38,9 @@ #include #include +#include +#include + __BEGIN_DECLS #include @@ -70,7 +73,7 @@ static const int kClassCapacityIncrement = 40; static const int kKModCapacityIncrement = 10; static OSDictionary *sAllClassesDict, *sKModClassesDict, *sSortedByClassesDict; -static mutex_t *loadLock; +static mutex_t *loadLock = 0; static struct StalledData { const char *kmodName; OSReturn result; @@ -80,6 +83,8 @@ static struct StalledData { } *sStalled; static unsigned int sConsiderUnloadDelay = 60; /* secs */ +static bool unloadsEnabled = true; // set to false when system going to sleep +static thread_call_t unloadCallout = 0; static const char OSMetaClassBasePanicMsg[] = "OSMetaClassBase::_RESERVEDOSMetaClassBase%d called\n"; @@ -272,7 +277,7 @@ OSMetaClass::OSMetaClass(const char *inClassName, sStalled->capacity += kKModCapacityIncrement; memmove(sStalled->classes, oldStalled, oldSize); - kfree((vm_offset_t)oldStalled, oldSize); + kfree(oldStalled, oldSize); ACCUMSIZE(newSize - oldSize); } @@ -356,7 +361,7 @@ void *OSMetaClass::preModLoad(const char *kmodName) sStalled->classes = (OSMetaClass **) kalloc(kKModCapacityIncrement * sizeof(OSMetaClass *)); if (!sStalled->classes) { - kfree((vm_offset_t) sStalled, sizeof(*sStalled)); + kfree(sStalled, sizeof(*sStalled)); return 0; } ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) + sizeof(*sStalled)); @@ -408,7 +413,7 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) case kCompletedBootstrap: { unsigned int i; - myname = OSSymbol::withCStringNoCopy(sStalled->kmodName); + myname = (OSSymbol *)OSSymbol::withCStringNoCopy(sStalled->kmodName); if (!sStalled->count) break; // Nothing to do so just get out @@ -465,9 +470,9 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) if (sStalled) { ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) + sizeof(*sStalled))); - kfree((vm_offset_t) sStalled->classes, + kfree(sStalled->classes, sStalled->capacity * sizeof(OSMetaClass *)); - kfree((vm_offset_t) sStalled, sizeof(*sStalled)); + kfree(sStalled, sizeof(*sStalled)); sStalled = 0; } @@ -558,6 +563,34 @@ void OSMetaClass::reportModInstances(const char *kmodName) iter->release(); } + +extern "C" { + +IOReturn OSMetaClassSystemSleepOrWake(UInt32 messageType) +{ + mutex_lock(loadLock); + + /* If the system is going to sleep, cancel the reaper thread timer + * and mark unloads disabled in case it just fired but hasn't + * taken the lock yet. If we are coming back from sleep, just + * set unloads enabled; IOService's normal operation will cause + * unloads to be considered soon enough. + */ + if (messageType == kIOMessageSystemWillSleep) { + if (unloadCallout) { + thread_call_cancel(unloadCallout); + } + unloadsEnabled = false; + } else if (messageType == kIOMessageSystemHasPoweredOn) { + unloadsEnabled = true; + } + mutex_unlock(loadLock); + + return kIOReturnSuccess; +} + +}; + extern "C" kern_return_t kmod_unload_cache(void); static void _OSMetaClassConsiderUnloads(thread_call_param_t p0, @@ -574,6 +607,11 @@ static void _OSMetaClassConsiderUnloads(thread_call_param_t p0, mutex_lock(loadLock); + if (!unloadsEnabled) { + mutex_unlock(loadLock); + return; + } + do { kmods = OSCollectionIterator::withCollection(sKModClassesDict); @@ -584,7 +622,7 @@ static void _OSMetaClassConsiderUnloads(thread_call_param_t p0, while ( (kmodName = (OSSymbol *) kmods->getNextObject()) ) { if (ki) { - kfree((vm_offset_t) ki, sizeof(kmod_info_t)); + kfree(ki, sizeof(kmod_info_t)); ki = 0; } @@ -626,7 +664,6 @@ static void _OSMetaClassConsiderUnloads(thread_call_param_t p0, void OSMetaClass::considerUnloads() { - static thread_call_t unloadCallout; AbsoluteTime when; mutex_lock(loadLock); @@ -780,7 +817,7 @@ const OSMetaClass *OSMetaClass::getSuperClass() const const OSSymbol *OSMetaClass::getKmodName() const { - return sSortedByClassesDict->getObject((const OSSymbol *)this); + return (const OSSymbol *)sSortedByClassesDict->getObject((OSSymbol *)this); } unsigned int OSMetaClass::getInstanceCount() const diff --git a/libkern/c++/OSObject.cpp b/libkern/c++/OSObject.cpp index 5923d3cb5..503b9504d 100644 --- a/libkern/c++/OSObject.cpp +++ b/libkern/c++/OSObject.cpp @@ -75,6 +75,8 @@ OSMetaClassDefineReservedUnused(OSObject, 12); OSMetaClassDefineReservedUnused(OSObject, 13); OSMetaClassDefineReservedUnused(OSObject, 14); OSMetaClassDefineReservedUnused(OSObject, 15); + +#ifdef __ppc__ OSMetaClassDefineReservedUnused(OSObject, 16); OSMetaClassDefineReservedUnused(OSObject, 17); OSMetaClassDefineReservedUnused(OSObject, 18); @@ -91,6 +93,7 @@ OSMetaClassDefineReservedUnused(OSObject, 28); OSMetaClassDefineReservedUnused(OSObject, 29); OSMetaClassDefineReservedUnused(OSObject, 30); OSMetaClassDefineReservedUnused(OSObject, 31); +#endif static const char *getClassName(const OSObject *obj) { @@ -271,7 +274,7 @@ void *OSObject::operator new(size_t size) void OSObject::operator delete(void *mem, size_t size) { - kfree((vm_offset_t) mem, size); + kfree(mem, size); ACCUMSIZE(-size); } diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp index 593ebc577..34b07fbce 100644 --- a/libkern/c++/OSOrderedSet.cpp +++ b/libkern/c++/OSOrderedSet.cpp @@ -99,7 +99,7 @@ void OSOrderedSet::free() flushCollection(); if (array) { - kfree((vm_offset_t)array, sizeof(_Element) * capacity); + kfree(array, sizeof(_Element) * capacity); ACCUMSIZE( -(sizeof(_Element) * capacity) ); } @@ -137,7 +137,7 @@ unsigned int OSOrderedSet::ensureCapacity(unsigned int newCapacity) bcopy(array, newArray, oldSize); bzero(&newArray[capacity], newSize - oldSize); - kfree((vm_offset_t)array, oldSize); + kfree(array, oldSize); array = newArray; capacity = newCapacity; } diff --git a/libkern/c++/OSString.cpp b/libkern/c++/OSString.cpp index d28d15783..fa7ed45ae 100644 --- a/libkern/c++/OSString.cpp +++ b/libkern/c++/OSString.cpp @@ -158,7 +158,7 @@ OSString *OSString::stringWithFormat(const char *format, ...) void OSString::free() { if ( !(flags & kOSStringNoCopy) && string) { - kfree((vm_offset_t)string, (vm_size_t)length); + kfree(string, (vm_size_t)length); ACCUMSIZE(-length); } diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp index 7c5307fb3..039c9eec4 100644 --- a/libkern/c++/OSSymbol.cpp +++ b/libkern/c++/OSSymbol.cpp @@ -113,7 +113,7 @@ void * OSSymbolPool::operator new(size_t size) void OSSymbolPool::operator delete(void *mem, size_t size) { - kfree((vm_offset_t)mem, size); + kfree(mem, size); ACCUMSIZE(-size); } @@ -145,12 +145,12 @@ OSSymbolPool::OSSymbolPool(const OSSymbolPool *old) OSSymbolPool::~OSSymbolPool() { if (buckets) { - kfree((vm_offset_t)buckets, nBuckets * sizeof(Bucket)); + kfree(buckets, nBuckets * sizeof(Bucket)); ACCUMSIZE(-(nBuckets * sizeof(Bucket))); } if (poolGate) - kfree((vm_offset_t) poolGate, 36 * 4); + kfree(poolGate, 36 * 4); } unsigned long OSSymbolPool::log2(unsigned int x) @@ -295,7 +295,7 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym) /* @@@ gvdl: Zero test and panic if can't set up pool */ list[0] = sym; bcopy(thisBucket->symbolP, list + 1, j * sizeof(OSSymbol *)); - kfree((vm_offset_t)thisBucket->symbolP, j * sizeof(OSSymbol *)); + kfree(thisBucket->symbolP, j * sizeof(OSSymbol *)); ACCUMSIZE(-(j * sizeof(OSSymbol *))); thisBucket->symbolP = list; if (count > nBuckets) @@ -334,7 +334,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) probeSymbol = list[0]; if (probeSymbol == sym) { thisBucket->symbolP = (OSSymbol **) list[1]; - kfree((vm_offset_t)list, 2 * sizeof(OSSymbol *)); + kfree(list, 2 * sizeof(OSSymbol *)); ACCUMSIZE(-(2 * sizeof(OSSymbol *))); count--; thisBucket->count--; @@ -344,7 +344,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) probeSymbol = list[1]; if (probeSymbol == sym) { thisBucket->symbolP = (OSSymbol **) list[0]; - kfree((vm_offset_t)list, 2 * sizeof(OSSymbol *)); + kfree(list, 2 * sizeof(OSSymbol *)); ACCUMSIZE(-(2 * sizeof(OSSymbol *))); count--; thisBucket->count--; @@ -367,7 +367,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) bcopy(thisBucket->symbolP + thisBucket->count-j, list + thisBucket->count-1-j, j * sizeof(OSSymbol *)); - kfree((vm_offset_t)thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *)); + kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *)); ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); thisBucket->symbolP = list; count--; diff --git a/libkern/c++/OSUnserializeXML.cpp b/libkern/c++/OSUnserializeXML.cpp index d77055edc..068e91f61 100644 --- a/libkern/c++/OSUnserializeXML.cpp +++ b/libkern/c++/OSUnserializeXML.cpp @@ -81,7 +81,7 @@ #define YYSTYPE object_t * #define YYPARSE_PARAM state -#define YYLEX_PARAM state +#define YYLEX_PARAM (parser_state_t *)state // this is the internal struct used to hold objects on parser stack // it represents objects both before and after they have been created @@ -147,7 +147,7 @@ extern unsigned long strtoul(const char *, char **, int); #define malloc(s) kern_os_malloc(s) #define realloc(a, s) kern_os_realloc(a, s) -#define free(a) kern_os_free(a) +#define free(a) kern_os_free((void *)a) #ifndef YYSTYPE #define YYSTYPE int @@ -933,7 +933,7 @@ case 16: case 17: #line 192 "OSUnserializeXML.y" { yyval = yyvsp[-1]; - yyval->key = yyval->object; + yyval->key = (OSString *)yyval->object; yyval->object = yyvsp[0]->object; yyval->next = NULL; yyvsp[0]->object = 0; diff --git a/libkern/c++/OSUnserializeXML.y b/libkern/c++/OSUnserializeXML.y index f8f1be578..33a18ef73 100644 --- a/libkern/c++/OSUnserializeXML.y +++ b/libkern/c++/OSUnserializeXML.y @@ -60,7 +60,7 @@ #define YYSTYPE object_t * #define YYPARSE_PARAM state -#define YYLEX_PARAM state +#define YYLEX_PARAM (parser_state_t *)state // this is the internal struct used to hold objects on parser stack // it represents objects both before and after they have been created @@ -126,7 +126,7 @@ extern unsigned long strtoul(const char *, char **, int); #define malloc(s) kern_os_malloc(s) #define realloc(a, s) kern_os_realloc(a, s) -#define free(a) kern_os_free(a) +#define free(a) kern_os_free((void *)a) %} %token ARRAY @@ -190,7 +190,7 @@ pairs: pair ; pair: key object { $$ = $1; - $$->key = $$->object; + $$->key = (OSString *)$$->object; $$->object = $2->object; $$->next = NULL; $2->object = 0; diff --git a/libkern/conf/Makefile.i386 b/libkern/conf/Makefile.i386 index fa98396d8..712240adf 100644 --- a/libkern/conf/Makefile.i386 +++ b/libkern/conf/Makefile.i386 @@ -25,6 +25,9 @@ OBJS_NO_WERROR= \ OSString.cpo \ OSSymbol.cpo \ OSUnserialize.cpo \ + OSIterator.cpo \ + OSSet.cpo \ + scanf.o \ OSUnserializeXML.cpo OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c index 4a1e7beb0..f924b7a45 100644 --- a/libkern/gen/OSAtomicOperations.c +++ b/libkern/gen/OSAtomicOperations.c @@ -28,7 +28,6 @@ enum { }; #define NULL 0L - /* * atomic operations * these are _the_ atomic operations, currently cast atop CompareAndSwap, @@ -71,6 +70,7 @@ SInt32 OSDecrementAtomic(SInt32 * value) return OSAddAtomic(-1, value); } +#ifdef CMPXCHG8B void * OSDequeueAtomic(void ** inList, SInt32 inOffset) { void * oldListHead; @@ -85,7 +85,6 @@ void * OSDequeueAtomic(void ** inList, SInt32 inOffset) newListHead = *(void **) (((char *) oldListHead) + inOffset); } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, (UInt32 *)inList)); - return oldListHead; } @@ -101,7 +100,7 @@ void OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset) } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, (UInt32 *)inList)); } - +#endif /* CMPXCHG8B */ #endif /* !__ppc__ */ static UInt32 OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, UInt32 * value) diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp index 64a752091..d8a8c2842 100644 --- a/libkern/gen/OSDebug.cpp +++ b/libkern/gen/OSDebug.cpp @@ -31,6 +31,7 @@ #include #include // From bsd's libkern directory +#include __BEGIN_DECLS // From osmfk/kern/thread.h but considered to be private @@ -39,6 +40,8 @@ extern vm_offset_t max_valid_stack_address(void); // From osfmk/kmod.c extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt); + +extern addr64_t kvtophys(vm_offset_t va); __END_DECLS static mutex_t *sOSReportLock = mutex_alloc(0); @@ -71,6 +74,39 @@ OSReportWithBacktrace(const char *str, ...) static vm_offset_t minstackaddr = min_valid_stack_address(); static vm_offset_t maxstackaddr = max_valid_stack_address(); +#if __i386__ +#define i386_RETURN_OFFSET 4 + +static unsigned int +i386_validate_stackptr(vm_offset_t stackptr) +{ + /* Existence and alignment check + */ + if (!stackptr || (stackptr & 0x3)) + return 0; + + /* Is a virtual->physical translation present? + */ + if (!kvtophys(stackptr)) + return 0; + + /* Check if the return address lies on the same page; + * If not, verify that a translation exists. + */ + if (((PAGE_SIZE - (stackptr & PAGE_MASK)) < i386_RETURN_OFFSET) && + !kvtophys(stackptr + i386_RETURN_OFFSET)) + return 0; + return 1; +} + +static unsigned int +i386_validate_raddr(vm_offset_t raddr) +{ + return ((raddr > VM_MIN_KERNEL_ADDRESS) && + (raddr < VM_MAX_KERNEL_ADDRESS)); +} +#endif + unsigned OSBacktrace(void **bt, unsigned maxAddrs) { unsigned frame; @@ -104,67 +140,50 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) for ( ; i < maxAddrs; i++) bt[i] = (void *) 0; -#elif 0 && __i386__ // Note that this should be ported for i386 - // This function is not safe, we should get this code ported appropriately - if (maxAddrs > 16) { - for (frame = 16; frame < maxAddrs; frame++) - bt[frame] = __builtin_return_address(frame); - maxAddrs = 16; - } +#elif __i386__ +#define SANE_i386_FRAME_SIZE 8*1024 + vm_offset_t stackptr, stackptr_prev, raddr; + unsigned frame_index = 0; +/* Obtain current frame pointer */ + __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); - switch(maxAddrs) { - case 15+1: bt[15] = __builtin_return_address(15); - case 14+1: bt[14] = __builtin_return_address(14); - case 13+1: bt[13] = __builtin_return_address(13); - case 12+1: bt[12] = __builtin_return_address(12); - case 11+1: bt[11] = __builtin_return_address(11); - case 10+1: bt[10] = __builtin_return_address(10); - case 9+1: bt[ 9] = __builtin_return_address( 9); - case 8+1: bt[ 8] = __builtin_return_address( 8); - case 7+1: bt[ 7] = __builtin_return_address( 7); - case 6+1: bt[ 6] = __builtin_return_address( 6); - case 5+1: bt[ 5] = __builtin_return_address( 5); - case 4+1: bt[ 4] = __builtin_return_address( 4); - case 3+1: bt[ 3] = __builtin_return_address( 3); - case 2+1: bt[ 2] = __builtin_return_address( 2); - case 1+1: bt[ 1] = __builtin_return_address( 1); - case 0+1: bt[ 0] = __builtin_return_address( 0); - case 0: default: break; - } + if (!i386_validate_stackptr(stackptr)) + goto pad; - frame = maxAddrs; -#else - // This function is not safe, we should get this code ported appropriately - if (maxAddrs > 16) { - for (frame = 16; frame < maxAddrs; frame++) - bt[frame] = 0; - maxAddrs = 16; - } + raddr = *((vm_offset_t *) (stackptr + i386_RETURN_OFFSET)); + + if (!i386_validate_raddr(raddr)) + goto pad; + + bt[frame_index++] = (void *) raddr; + + for ( ; frame_index < maxAddrs; frame_index++) { + stackptr_prev = stackptr; + stackptr = *((vm_offset_t *) stackptr_prev); + + if (!i386_validate_stackptr(stackptr)) + break; + /* Stack grows downwards */ + if (stackptr < stackptr_prev) + break; - switch (maxAddrs) { - case 15+1: bt[15] = __builtin_return_address(15); - case 14+1: bt[14] = __builtin_return_address(14); - case 13+1: bt[13] = __builtin_return_address(13); - case 12+1: bt[12] = __builtin_return_address(12); - case 11+1: bt[11] = __builtin_return_address(11); - case 10+1: bt[10] = __builtin_return_address(10); - case 9+1: bt[ 9] = __builtin_return_address( 9); - case 8+1: bt[ 8] = __builtin_return_address( 8); - case 7+1: bt[ 7] = __builtin_return_address( 7); - case 6+1: bt[ 6] = __builtin_return_address( 6); - case 5+1: bt[ 5] = __builtin_return_address( 5); - case 4+1: bt[ 4] = __builtin_return_address( 4); - case 3+1: bt[ 3] = __builtin_return_address( 3); - case 2+1: bt[ 2] = __builtin_return_address( 2); - case 1+1: bt[ 1] = __builtin_return_address( 1); - case 0+1: bt[ 0] = __builtin_return_address( 0); - case 0: - default : - break; + if ((stackptr_prev ^ stackptr) > SANE_i386_FRAME_SIZE) + break; + + raddr = *((vm_offset_t *) (stackptr + i386_RETURN_OFFSET)); + + if (!i386_validate_raddr(raddr)) + break; + + bt[frame_index] = (void *) raddr; } +pad: + frame = frame_index; - frame = maxAddrs; + for ( ; frame_index < maxAddrs; frame_index++) + bt[frame_index] = (void *) 0; +#else +#error arch #endif - return frame; } diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index 2b839fe77..1870272b5 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -261,6 +261,7 @@ extern Boolean OSTestAndSet(UInt32 bit, UInt8 * startAddress); extern Boolean OSTestAndClear(UInt32 bit, UInt8 * startAddress); +#ifdef __ppc__ /*! @function OSEnqueueAtomic @abstract Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform. @discussion The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. @@ -284,6 +285,7 @@ extern void OSEnqueueAtomic(void ** listHead, void * element, extern void * OSDequeueAtomic(void ** listHead, SInt32 elementNextFieldOffset); +#endif /* __ppc__ */ /*! @function OSSynchronizeIO @abstract The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. diff --git a/libkern/libkern/OSAtomic.h.save b/libkern/libkern/OSAtomic.h.save index 2b839fe77..1870272b5 100644 --- a/libkern/libkern/OSAtomic.h.save +++ b/libkern/libkern/OSAtomic.h.save @@ -261,6 +261,7 @@ extern Boolean OSTestAndSet(UInt32 bit, UInt8 * startAddress); extern Boolean OSTestAndClear(UInt32 bit, UInt8 * startAddress); +#ifdef __ppc__ /*! @function OSEnqueueAtomic @abstract Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform. @discussion The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. @@ -284,6 +285,7 @@ extern void OSEnqueueAtomic(void ** listHead, void * element, extern void * OSDequeueAtomic(void ** listHead, SInt32 elementNextFieldOffset); +#endif /* __ppc__ */ /*! @function OSSynchronizeIO @abstract The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. diff --git a/libkern/libkern/OSByteOrder.h b/libkern/libkern/OSByteOrder.h index c64a3aa05..f9b3a6a83 100644 --- a/libkern/libkern/OSByteOrder.h +++ b/libkern/libkern/OSByteOrder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,26 +19,64 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ #ifndef _OS_OSBYTEORDER_H #define _OS_OSBYTEORDER_H #include -#if defined(__GNUC__) && defined(__ppc__) +/* Macros for swapping constant values in the preprocessing stage. */ +#define OSSwapConstInt16(x) \ + ((uint16_t)((((uint16_t)(x) & 0xff00) >> 8) | \ + (((uint16_t)(x) & 0x00ff) << 8))) + +#define OSSwapConstInt32(x) \ + ((uint32_t)((((uint32_t)(x) & 0xff000000) >> 24) | \ + (((uint32_t)(x) & 0x00ff0000) >> 8) | \ + (((uint32_t)(x) & 0x0000ff00) << 8) | \ + (((uint32_t)(x) & 0x000000ff) << 24))) + +#define OSSwapConstInt64(x) \ + ((uint64_t)((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \ + (((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \ + (((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \ + (((uint64_t)(x) & 0x000000ff00000000ULL) >> 8) | \ + (((uint64_t)(x) & 0x00000000ff000000ULL) << 8) | \ + (((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \ + (((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \ + (((uint64_t)(x) & 0x00000000000000ffULL) << 56))) + +#if defined(__GNUC__) + +#if (defined(__ppc__) || defined(__ppc64__)) #include -#elif defined(__GNUC__) && defined(__i386__) +#elif (defined(__i386__) || defined(__x86_64__)) #include #else #include #endif +#define OSSwapInt16(x) \ + (__builtin_constant_p(x) ? OSSwapConstInt16(x) : _OSSwapInt16(x)) + +#define OSSwapInt32(x) \ + (__builtin_constant_p(x) ? OSSwapConstInt32(x) : _OSSwapInt32(x)) + +#define OSSwapInt64(x) \ + (__builtin_constant_p(x) ? OSSwapConstInt64(x) : _OSSwapInt64(x)) + +#else /* ! __GNUC__ */ + +#include + +#define OSSwapInt16(x) OSSwapConstInt16(x) + +#define OSSwapInt32(x) OSSwapConstInt32(x) + +#define OSSwapInt64(x) OSSwapConstInt64(x) + +#endif /* __GNUC__ */ + enum { OSUnknownByteOrder, OSLittleEndian, @@ -57,37 +95,6 @@ OSHostByteOrder(void) { #endif } -/* Macros for swapping constant values in the preprocessing stage. */ -#define OSSwapConstInt16(x) ((((uint16_t)(x) & 0xff00) >> 8) | \ - (((uint16_t)(x) & 0x00ff) << 8)) - -#define OSSwapConstInt32(x) ((((uint32_t)(x) & 0xff000000) >> 24) | \ - (((uint32_t)(x) & 0x00ff0000) >> 8) | \ - (((uint32_t)(x) & 0x0000ff00) << 8) | \ - (((uint32_t)(x) & 0x000000ff) << 24)) - -#define OSSwapConstInt64(x) ((((uint64_t)(x) & 0xff00000000000000ULL) >> 56) | \ - (((uint64_t)(x) & 0x00ff000000000000ULL) >> 40) | \ - (((uint64_t)(x) & 0x0000ff0000000000ULL) >> 24) | \ - (((uint64_t)(x) & 0x000000ff00000000ULL) >> 8) | \ - (((uint64_t)(x) & 0x00000000ff000000ULL) << 8) | \ - (((uint64_t)(x) & 0x0000000000ff0000ULL) << 24) | \ - (((uint64_t)(x) & 0x000000000000ff00ULL) << 40) | \ - (((uint64_t)(x) & 0x00000000000000ffULL) << 56)) - -#if !defined(__GNUC__) -#define __builtin_constant_p(x) (0) -#endif - -#define OSSwapInt16(x) \ - (__builtin_constant_p(x) ? OSSwapConstInt16(x) : _OSSwapInt16(x)) - -#define OSSwapInt32(x) \ - (__builtin_constant_p(x) ? OSSwapConstInt32(x) : _OSSwapInt32(x)) - -#define OSSwapInt64(x) \ - (__builtin_constant_p(x) ? OSSwapConstInt64(x) : _OSSwapInt64(x)) - #define OSReadBigInt(x, y) OSReadBigInt32(x, y) #define OSWriteBigInt(x, y, z) OSWriteBigInt32(x, y, z) #define OSSwapBigToHostInt(x) OSSwapBigToHostInt32(x) @@ -97,141 +104,98 @@ OSHostByteOrder(void) { #define OSSwapHostToLittleInt(x) OSSwapHostToLittleInt32(x) #define OSSwapLittleToHostInt(x) OSSwapLittleToHostInt32(x) -#if defined(__BIG_ENDIAN__) - -/* Functions for loading big endian to host endianess. */ +/* Functions for loading native endian values. */ OS_INLINE uint16_t -OSReadBigInt16( +_OSReadInt16( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { - return *(volatile uint16_t *)((uintptr_t)base + offset); + return *(volatile uint16_t *)((uintptr_t)base + byteOffset); } OS_INLINE uint32_t -OSReadBigInt32( +_OSReadInt32( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { - return *(volatile uint32_t *)((uintptr_t)base + offset); + return *(volatile uint32_t *)((uintptr_t)base + byteOffset); } OS_INLINE uint64_t -OSReadBigInt64( +_OSReadInt64( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { - return *(volatile uint64_t *)((uintptr_t)base + offset); + return *(volatile uint64_t *)((uintptr_t)base + byteOffset); } -/* Functions for storing host endianess to big endian. */ +/* Functions for storing native endian values. */ OS_INLINE void -OSWriteBigInt16( +_OSWriteInt16( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint16_t data ) { - *(volatile uint16_t *)((uintptr_t)base + offset) = data; + *(volatile uint16_t *)((uintptr_t)base + byteOffset) = data; } OS_INLINE void -OSWriteBigInt32( +_OSWriteInt32( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint32_t data ) { - *(volatile uint32_t *)((uintptr_t)base + offset) = data; + *(volatile uint32_t *)((uintptr_t)base + byteOffset) = data; } OS_INLINE void -OSWriteBigInt64( +_OSWriteInt64( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint64_t data ) { - *(volatile uint64_t *)((uintptr_t)base + offset) = data; + *(volatile uint64_t *)((uintptr_t)base + byteOffset) = data; } -/* Functions for loading little endian to host endianess. */ +#if defined(__BIG_ENDIAN__) -OS_INLINE -uint16_t -OSReadLittleInt16( - volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt16(base, offset); -} +/* Functions for loading big endian to host endianess. */ -OS_INLINE -uint32_t -OSReadLittleInt32( - volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt32(base, offset); -} +#define OSReadBigInt16(base, byteOffset) _OSReadInt16(base, byteOffset) +#define OSReadBigInt32(base, byteOffset) _OSReadInt32(base, byteOffset) +#define OSReadBigInt64(base, byteOffset) _OSReadInt64(base, byteOffset) -OS_INLINE -uint64_t -OSReadLittleInt64( - volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt64(base, offset); -} +/* Functions for storing host endianess to big endian. */ -/* Functions for storing host endianess to little endian. */ +#define OSWriteBigInt16(base, byteOffset, data) _OSWriteInt16(base, byteOffset, data) +#define OSWriteBigInt32(base, byteOffset, data) _OSWriteInt32(base, byteOffset, data) +#define OSWriteBigInt64(base, byteOffset, data) _OSWriteInt64(base, byteOffset, data) -OS_INLINE -void -OSWriteLittleInt16( - volatile void * base, - uintptr_t offset, - uint16_t data -) -{ - OSWriteSwapInt16(base, offset, data); -} +/* Functions for loading little endian to host endianess. */ -OS_INLINE -void -OSWriteLittleInt32( - volatile void * base, - uintptr_t offset, - uint32_t data -) -{ - OSWriteSwapInt32(base, offset, data); -} +#define OSReadLittleInt16(base, byteOffset) OSReadSwapInt16(base, byteOffset) +#define OSReadLittleInt32(base, byteOffset) OSReadSwapInt32(base, byteOffset) +#define OSReadLittleInt64(base, byteOffset) OSReadSwapInt64(base, byteOffset) -OS_INLINE -void -OSWriteLittleInt64( - volatile void * base, - uintptr_t offset, - uint64_t data -) -{ - OSWriteSwapInt64(base, offset, data); -} +/* Functions for storing host endianess to little endian. */ + +#define OSWriteLittleInt16(base, byteOffset, data) OSWriteSwapInt16(base, byteOffset, data) +#define OSWriteLittleInt32(base, byteOffset, data) OSWriteSwapInt32(base, byteOffset, data) +#define OSWriteLittleInt64(base, byteOffset, data) OSWriteSwapInt64(base, byteOffset, data) /* Host endianess to big endian byte swapping macros for constants. */ @@ -241,32 +205,9 @@ OSWriteLittleInt64( /* Generic host endianess to big endian byte swapping functions. */ -OS_INLINE -uint16_t -OSSwapHostToBigInt16( - uint16_t data -) -{ - return data; -} - -OS_INLINE -uint32_t -OSSwapHostToBigInt32( - uint32_t data -) -{ - return data; -} - -OS_INLINE -uint64_t -OSSwapHostToBigInt64( - uint64_t data -) -{ - return data; -} +#define OSSwapHostToBigInt16(x) ((uint16_t)(x)) +#define OSSwapHostToBigInt32(x) ((uint32_t)(x)) +#define OSSwapHostToBigInt64(x) ((uint64_t)(x)) /* Host endianess to little endian byte swapping macros for constants. */ @@ -288,32 +229,9 @@ OSSwapHostToBigInt64( /* Generic big endian to host endianess byte swapping functions. */ -OS_INLINE -uint16_t -OSSwapBigToHostInt16( - uint16_t data -) -{ - return data; -} - -OS_INLINE -uint32_t -OSSwapBigToHostInt32( - uint32_t data -) -{ - return data; -} - -OS_INLINE -uint64_t -OSSwapBigToHostInt64( - uint64_t data -) -{ - return data; -} +#define OSSwapBigToHostInt16(x) ((uint16_t)(x)) +#define OSSwapBigToHostInt32(x) ((uint32_t)(x)) +#define OSSwapBigToHostInt64(x) ((uint64_t)(x)) /* Little endian to host endianess byte swapping macros for constants. */ @@ -331,137 +249,27 @@ OSSwapBigToHostInt64( /* Functions for loading big endian to host endianess. */ -OS_INLINE -uint16_t -OSReadBigInt16( - const volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt16(base, offset); -} - -OS_INLINE -uint32_t -OSReadBigInt32( - const volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt32(base, offset); -} - -OS_INLINE -uint64_t -OSReadBigInt64( - const volatile void * base, - uintptr_t offset -) -{ - return OSReadSwapInt64(base, offset); -} +#define OSReadBigInt16(base, byteOffset) OSReadSwapInt16(base, byteOffset) +#define OSReadBigInt32(base, byteOffset) OSReadSwapInt32(base, byteOffset) +#define OSReadBigInt64(base, byteOffset) OSReadSwapInt64(base, byteOffset) /* Functions for storing host endianess to big endian. */ -OS_INLINE -void -OSWriteBigInt16( - volatile void * base, - uintptr_t offset, - uint16_t data -) -{ - OSWriteSwapInt16(base, offset, data); -} - -OS_INLINE -void -OSWriteBigInt32( - volatile void * base, - uintptr_t offset, - uint32_t data -) -{ - OSWriteSwapInt32(base, offset, data); -} - -OS_INLINE -void -OSWriteBigInt64( - volatile void * base, - uintptr_t offset, - uint64_t data -) -{ - OSWriteSwapInt64(base, offset, data); -} +#define OSWriteBigInt16(base, byteOffset, data) OSWriteSwapInt16(base, byteOffset, data) +#define OSWriteBigInt32(base, byteOffset, data) OSWriteSwapInt32(base, byteOffset, data) +#define OSWriteBigInt64(base, byteOffset, data) OSWriteSwapInt64(base, byteOffset, data) /* Functions for loading little endian to host endianess. */ -OS_INLINE -uint16_t -OSReadLittleInt16( - const volatile void * base, - uintptr_t offset -) -{ - return *(volatile uint16_t *)((uintptr_t)base + offset); -} - -OS_INLINE -uint32_t -OSReadLittleInt32( - const volatile void * base, - uintptr_t offset -) -{ - return *(volatile uint32_t *)((uintptr_t)base + offset); -} - -OS_INLINE -uint64_t -OSReadLittleInt64( - const volatile void * base, - uintptr_t offset -) -{ - return *(volatile uint64_t *)((uintptr_t)base + offset); -} +#define OSReadLittleInt16(base, byteOffset) _OSReadInt16(base, byteOffset) +#define OSReadLittleInt32(base, byteOffset) _OSReadInt32(base, byteOffset) +#define OSReadLittleInt64(base, byteOffset) _OSReadInt64(base, byteOffset) /* Functions for storing host endianess to little endian. */ -OS_INLINE -void -OSWriteLittleInt16( - volatile void * base, - uintptr_t offset, - uint16_t data -) -{ - *(volatile uint16_t *)((uintptr_t)base + offset) = data; -} - -OS_INLINE -void -OSWriteLittleInt32( - volatile void * base, - uintptr_t offset, - uint32_t data -) -{ - *(volatile uint32_t *)((uintptr_t)base + offset) = data; -} - -OS_INLINE -void -OSWriteLittleInt64( - volatile void * base, - uintptr_t offset, - uint64_t data -) -{ - *(volatile uint64_t *)((uintptr_t)base + offset) = data; -} +#define OSWriteLittleInt16(base, byteOffset, data) _OSWriteInt16(base, byteOffset, data) +#define OSWriteLittleInt32(base, byteOffset, data) _OSWriteInt32(base, byteOffset, data) +#define OSWriteLittleInt64(base, byteOffset, data) _OSWriteInt64(base, byteOffset, data) /* Host endianess to big endian byte swapping macros for constants. */ @@ -483,32 +291,9 @@ OSWriteLittleInt64( /* Generic host endianess to little endian byte swapping functions. */ -OS_INLINE -uint16_t -OSSwapHostToLittleInt16( - uint16_t data -) -{ - return data; -} - -OS_INLINE -uint32_t -OSSwapHostToLittleInt32( - uint32_t data -) -{ - return data; -} - -OS_INLINE -uint64_t -OSSwapHostToLittleInt64( - uint64_t data -) -{ - return data; -} +#define OSSwapHostToLittleInt16(x) ((uint16_t)(x)) +#define OSSwapHostToLittleInt32(x) ((uint32_t)(x)) +#define OSSwapHostToLittleInt64(x) ((uint64_t)(x)) /* Big endian to host endianess byte swapping macros for constants. */ @@ -530,32 +315,9 @@ OSSwapHostToLittleInt64( /* Generic little endian to host endianess byte swapping functions. */ -OS_INLINE -uint16_t -OSSwapLittleToHostInt16( - uint16_t data -) -{ - return data; -} - -OS_INLINE -uint32_t -OSSwapLittleToHostInt32( - uint32_t data -) -{ - return data; -} - -OS_INLINE -uint64_t -OSSwapLittleToHostInt64( - uint64_t data -) -{ - return data; -} +#define OSSwapLittleToHostInt16(x) ((uint16_t)(x)) +#define OSSwapLittleToHostInt32(x) ((uint32_t)(x)) +#define OSSwapLittleToHostInt64(x) ((uint64_t)(x)) #else #error Unknown endianess. diff --git a/libkern/libkern/OSCrossEndian.h b/libkern/libkern/OSCrossEndian.h index 0131455d1..0b1e5aa18 100644 --- a/libkern/libkern/OSCrossEndian.h +++ b/libkern/libkern/OSCrossEndian.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -52,27 +52,30 @@ #ifndef _LIBKERN_OSCROSSENDIAN_H #define _LIBKERN_OSCROSSENDIAN_H +#include + #if __ppc__ -static __inline__ int _OSRosettaCheck(void) +static __inline__ int +_OSRosettaCheck(void) { - int isCrossEndian; + int isCrossEndian = 0; + int val = 0; + size_t size = sizeof val; - __asm__ ( "b 0f\n" - " .long 0x14400004\n" - " li %0,1\n" - "0:" - : "=r" (isCrossEndian) : "0" (0) - ); + if (sysctlbyname("sysctl.proc_native", &val, &size, NULL, 0) == -1) + isCrossEndian = 0; + else + isCrossEndian = val ? 0 : 1; - return isCrossEndian; + return isCrossEndian; } -#else +#else /* __ppc__ */ static __inline__ int _OSRosettaCheck(void) { return 0; } -#endif +#endif /* __ppc__ */ #define IF_ROSETTA() if (__builtin_expect(_OSRosettaCheck(), 0) ) diff --git a/libkern/libkern/c++/Makefile b/libkern/libkern/c++/Makefile index 3c98e5543..c2660a21b 100644 --- a/libkern/libkern/c++/Makefile +++ b/libkern/libkern/c++/Makefile @@ -27,6 +27,7 @@ DATAFILES = \ OSCPPDebug.h \ OSData.h \ OSDictionary.h \ + OSEndianTypes.h \ OSIterator.h \ OSLib.h \ OSMetaClass.h \ diff --git a/libkern/libkern/c++/OSEndianTypes.h b/libkern/libkern/c++/OSEndianTypes.h new file mode 100644 index 000000000..c427d5743 --- /dev/null +++ b/libkern/libkern/c++/OSEndianTypes.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + * + * HISTORY + * gvdl 20050620 Created + */ + +/*! +@header OSEndianTypes +@abstract C++ inline types for byte swapping +@discussion +The OSEndianTypes consist of a number of types that are used very similarly to the traditional MacOS C scalar integers types, eg. UInt32 and SInt32. +@copyright 2005 Apple Computer, Inc. All rights reserved. +@updated 2005-07-25 +*/ + +// Header doc magic trick for simple documentation +#if 0 +/*! @typedef BigUInt16 +@abstract A Big-endian unsigned integer scalar size 16 - UInt16 */ +typedef class BigUInt16 BigUInt16; +#endif + +#if 0 +/*! @typedef BigSInt16 +@abstract A Big-endian signed integer scalar size 16 - SInt16 */ +typedef class BigSInt16 BigSInt16; + +/*! @typedef BigUInt32 +@abstract A Big-endian unsigned integer scalar size 32 - UInt32 */ +typedef class BigUInt32 BigUInt32; + +/*! @typedef BigSInt32 +@abstract A Big-endian signed integer scalar size 32 - SInt32 */ +typedef class BigSInt32 BigSInt32; + +/*! @typedef BigUInt64 +@abstract A Big-endian unsigned integer scalar size 64 - UInt64 */ +typedef class BigUInt64 BigUInt64; + +/*! @typedef BigSInt64 +@abstract A Big-endian signed integer scalar size 64 - SInt64 */ +typedef class BigSInt64 BigSInt64; + +/*! @typedef LittleUInt16 +@abstract A Little-endian unsigned integer scalar size 16 - UInt16 */ +typedef class LittleUInt16 LittleUInt16; + +/*! @typedef LittleSInt16 +@abstract A Little-endian signed integer scalar size 16 - SInt16 */ +typedef class LittleSInt16 LittleSInt16; + +/*! @typedef LittleUInt32 +@abstract A Little-endian unsigned integer scalar size 32 - UInt32 */ +typedef class LittleUInt32 LittleUInt32; + +/*! @typedef LittleSInt32 +@abstract A Little-endian signed integer scalar size 32 - SInt32 */ +typedef class LittleSInt32 LittleSInt32; + +/*! @typedef LittleUInt64 +@abstract A Little-endian unsigned integer scalar size 64 - UInt64 */ +typedef class LittleUInt64 LittleUInt64; + +/*! @typedef LittleSInt64 +@abstract A Little-endian signed integer scalar size 64 - SInt64 */ +typedef class LittleSInt64 LittleSInt64; + +*/ +#endif + +#ifndef _OS_OSENDIANHELPER_H +#define _OS_OSENDIANHELPER_H + +#if __cplusplus + +#include +#include + +// Probably should really be using templates, this is one of the few cases +// where they do make sense. But as the kernel is not allowed to export +// template based C++ APIs we have to use sophisticated macros instead +#define __OSEndianSignIntSizeDEF(argname, argend, argtype, argsize) { \ +public: \ + typedef argtype ## argsize Value; \ + \ +private: \ + typedef UInt ## argsize UValue; \ + UValue mValue; \ + \ + void writeValue(Value v) { \ + if (__builtin_constant_p(v)) \ + mValue = OSSwapHostTo ## argend ## ConstInt ## argsize(v); \ + else \ + OSWrite ## argend ## Int ## argsize(&mValue, 0, (UValue) v);\ + }; \ + \ + Value readValue() const { \ + return (Value) OSRead ## argend ## Int ## argsize(&mValue, 0); \ + }; \ + \ +public: \ + argname() { }; \ + \ + argname (Value v) { writeValue(v); }; \ + argname &operator = (Value v) { writeValue(v); return *this; } \ + \ + Value get() const { return readValue(); }; \ + operator Value () const { return readValue(); }; \ +} + +class BigUInt16 __OSEndianSignIntSizeDEF(BigUInt16, Big, UInt, 16); +class BigSInt16 __OSEndianSignIntSizeDEF(BigSInt16, Big, SInt, 16); +class BigUInt32 __OSEndianSignIntSizeDEF(BigUInt32, Big, UInt, 32); +class BigSInt32 __OSEndianSignIntSizeDEF(BigSInt32, Big, SInt, 32); +class BigUInt64 __OSEndianSignIntSizeDEF(BigUInt64, Big, UInt, 64); +class BigSInt64 __OSEndianSignIntSizeDEF(BigSInt64, Big, SInt, 64); +class LittleUInt16 __OSEndianSignIntSizeDEF(LittleUInt16, Little, UInt, 16); +class LittleSInt16 __OSEndianSignIntSizeDEF(LittleSInt16, Little, SInt, 16); +class LittleUInt32 __OSEndianSignIntSizeDEF(LittleUInt32, Little, UInt, 32); +class LittleSInt32 __OSEndianSignIntSizeDEF(LittleSInt32, Little, SInt, 32); +class LittleUInt64 __OSEndianSignIntSizeDEF(LittleUInt64, Little, UInt, 64); +class LittleSInt64 __OSEndianSignIntSizeDEF(LittleSInt64, Little, SInt, 64); + +#undef __OSEndianSignIntSizeDEF + +#endif /* __cplusplus */ + +#endif /* ! _OS_OSENDIANHELPER_H */ + + diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index 02982c302..5f2615c4e 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -34,7 +34,7 @@ class OSSymbol; class OSDictionary; class OSSerialize; -#if __GNUC__ < 3 +#if !defined(__ppc__) || __GNUC__ < 3 #define APPLE_KEXT_COMPATIBILITY #else #define APPLE_KEXT_COMPATIBILITY __attribute__ ((apple_kext_compatibility)) @@ -359,7 +359,7 @@ class OSMetaClass : private OSMetaClassBase // Needs to be overriden as NULL as all OSMetaClass objects are allocated // statically at compile time, don't accidently try to free them. - void operator delete(void *mem, size_t size) { }; + void operator delete(void *, size_t) { }; public: static const OSMetaClass * const metaClass; diff --git a/libkern/libkern/c++/OSObject.h b/libkern/libkern/c++/OSObject.h index d7ed685bc..a39913302 100644 --- a/libkern/libkern/c++/OSObject.h +++ b/libkern/libkern/c++/OSObject.h @@ -162,6 +162,8 @@ class OSObject : public OSMetaClassBase OSMetaClassDeclareReservedUnused(OSObject, 13); OSMetaClassDeclareReservedUnused(OSObject, 14); OSMetaClassDeclareReservedUnused(OSObject, 15); + +#ifdef __ppc__ OSMetaClassDeclareReservedUnused(OSObject, 16); OSMetaClassDeclareReservedUnused(OSObject, 17); OSMetaClassDeclareReservedUnused(OSObject, 18); @@ -178,6 +180,7 @@ class OSObject : public OSMetaClassBase OSMetaClassDeclareReservedUnused(OSObject, 29); OSMetaClassDeclareReservedUnused(OSObject, 30); OSMetaClassDeclareReservedUnused(OSObject, 31); +#endif }; #endif /* !_LIBKERN_OSOBJECT_H */ diff --git a/libkern/libkern/i386/OSByteOrder.h b/libkern/libkern/i386/OSByteOrder.h index eb6576624..5783957f0 100644 --- a/libkern/libkern/i386/OSByteOrder.h +++ b/libkern/libkern/i386/OSByteOrder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,13 @@ #include #if !defined(OS_INLINE) +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L # define OS_INLINE static inline +# elif defined(__MWERKS__) || defined(__cplusplus) +# define OS_INLINE static inline +# else +# define OS_INLINE static __inline__ +# endif #endif /* Generic byte swapping functions. */ @@ -37,8 +43,7 @@ _OSSwapInt16( uint16_t data ) { - __asm__ ("xchgb %b0, %h0" : "+q" (data)); - return data; + return ((data << 8) | (data >> 8)); } OS_INLINE @@ -47,28 +52,36 @@ _OSSwapInt32( uint32_t data ) { - __asm__ ("bswap %0" : "+r" (data)); + __asm__ ("bswap %0" : "+r" (data)); return data; } +#if defined(__i386__) OS_INLINE uint64_t _OSSwapInt64( uint64_t data ) { - union { - uint64_t ull; - uint32_t ul[2]; - } u; - - /* This actually generates the best code */ - u.ul[0] = data >> 32; - u.ul[1] = data & 0xffffffff; - u.ul[0] = _OSSwapInt32(u.ul[0]); - u.ul[1] = _OSSwapInt32(u.ul[1]); - return u.ull; + __asm__ ("bswap %%eax\n\t" + "bswap %%edx\n\t" + "xchgl %%eax, %%edx" + : "+A" (data)); + return data; } +#elif defined(__x86_64__) +OS_INLINE +uint64_t +_OSSwapInt64( + uint64_t data +) +{ + __asm__ ("bswap %0" : "+r" (data)); + return data; +} +#else +#error Unknown architecture +#endif /* Functions for byte reversed loads. */ @@ -76,12 +89,12 @@ OS_INLINE uint16_t OSReadSwapInt16( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { uint16_t result; - result = *(volatile uint16_t *)((uintptr_t)base + offset); + result = *(volatile uint16_t *)((uintptr_t)base + byteOffset); return _OSSwapInt16(result); } @@ -89,12 +102,12 @@ OS_INLINE uint32_t OSReadSwapInt32( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { uint32_t result; - result = *(volatile uint32_t *)((uintptr_t)base + offset); + result = *(volatile uint32_t *)((uintptr_t)base + byteOffset); return _OSSwapInt32(result); } @@ -102,21 +115,13 @@ OS_INLINE uint64_t OSReadSwapInt64( const volatile void * base, - uintptr_t offset + uintptr_t byteOffset ) { - const volatile uint32_t * inp; - union ullc { - uint64_t ull; - uint32_t ul[2]; - } outv; - - inp = (const volatile uint32_t *)((uintptr_t)base + offset); - outv.ul[0] = inp[1]; - outv.ul[1] = inp[0]; - outv.ul[0] = _OSSwapInt32(outv.ul[0]); - outv.ul[1] = _OSSwapInt32(outv.ul[1]); - return outv.ull; + uint64_t result; + + result = *(volatile uint64_t *)((uintptr_t)base + byteOffset); + return _OSSwapInt64(result); } /* Functions for byte reversed stores. */ @@ -125,33 +130,33 @@ OS_INLINE void OSWriteSwapInt16( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint16_t data ) { - *(volatile uint16_t *)((uintptr_t)base + offset) = _OSSwapInt16(data); + *(volatile uint16_t *)((uintptr_t)base + byteOffset) = _OSSwapInt16(data); } OS_INLINE void OSWriteSwapInt32( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint32_t data ) { - *(volatile uint32_t *)((uintptr_t)base + offset) = _OSSwapInt32(data); + *(volatile uint32_t *)((uintptr_t)base + byteOffset) = _OSSwapInt32(data); } OS_INLINE void OSWriteSwapInt64( volatile void * base, - uintptr_t offset, + uintptr_t byteOffset, uint64_t data ) { - *(volatile uint64_t *)((uintptr_t)base + offset) = _OSSwapInt64(data); + *(volatile uint64_t *)((uintptr_t)base + byteOffset) = _OSSwapInt64(data); } #endif /* ! _OS_OSBYTEORDERI386_H */ diff --git a/libkern/libkern/machine/OSByteOrder.h b/libkern/libkern/machine/OSByteOrder.h index 5b96426ba..97f2a6fbd 100644 --- a/libkern/libkern/machine/OSByteOrder.h +++ b/libkern/libkern/machine/OSByteOrder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,13 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ - #ifndef _OS_OSBYTEORDERMACHINE_H #define _OS_OSBYTEORDERMACHINE_H @@ -33,164 +26,112 @@ #include #if !defined(OS_INLINE) +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +# define OS_INLINE static inline +# elif defined(__MWERKS__) || defined(__cplusplus) # define OS_INLINE static inline +# else +# define OS_INLINE static __inline__ +# endif #endif -/* Functions for byte reversed loads. */ +/* Generic byte swapping functions. */ OS_INLINE uint16_t -OSReadSwapInt16( - volatile void * base, - uintptr_t offset +_OSSwapInt16( + uint16_t data ) { - union sconv { - uint16_t us; - uint8_t uc[2]; - } *inp, outv; - inp = (union sconv *)((uint8_t *)base + offset); - outv.uc[0] = inp->uc[1]; - outv.uc[1] = inp->uc[0]; - return (outv.us); + return OSSwapConstInt16(data); } OS_INLINE uint32_t -OSReadSwapInt32( - volatile void * base, - uintptr_t offset +_OSSwapInt32( + uint32_t data ) { - union lconv { - uint32_t ul; - uint8_t uc[4]; - } *inp, outv; - inp = (union lconv *)((uint8_t *)base + offset); - outv.uc[0] = inp->uc[3]; - outv.uc[1] = inp->uc[2]; - outv.uc[2] = inp->uc[1]; - outv.uc[3] = inp->uc[0]; - return (outv.ul); + return OSSwapConstInt32(data); } OS_INLINE uint64_t -OSReadSwapInt64( - volatile void * base, - uintptr_t offset +_OSSwapInt64( + uint64_t data ) { - union llconv { - uint64_t ull; - uint8_t uc[8]; - } *inp, outv; - inp = (union llconv *)((uint8_t *)base + offset); - outv.uc[0] = inp->uc[7]; - outv.uc[1] = inp->uc[6]; - outv.uc[2] = inp->uc[5]; - outv.uc[3] = inp->uc[4]; - outv.uc[4] = inp->uc[3]; - outv.uc[5] = inp->uc[2]; - outv.uc[6] = inp->uc[1]; - outv.uc[7] = inp->uc[0]; - return (outv.ull); + return OSSwapConstInt64(data); } -/* Functions for byte reversed stores. */ +/* Functions for byte reversed loads. */ OS_INLINE -void -OSWriteSwapInt16( - volatile void * base, - uintptr_t offset, - uint16_t data +uint16_t +OSReadSwapInt16( + const volatile void * base, + uintptr_t byteOffset ) { - union sconv { - uint16_t us; - uint8_t uc[2]; - } *inp, *outp; - inp = (union sconv *)((uint8_t *)base + offset); - outp = (union sconv *)&data; - outp->uc[0] = inp->uc[1]; - outp->uc[1] = inp->uc[0]; + uint16_t data = *(volatile uint16_t *)((uintptr_t)base + byteOffset); + return _OSSwapInt16(data); } OS_INLINE -void -OSWriteSwapInt32( - volatile void * base, - uintptr_t offset, - uint32_t data +uint32_t +OSReadSwapInt32( + const volatile void * base, + uintptr_t byteOffset ) { - union lconv { - uint32_t ul; - uint8_t uc[4]; - } *inp, *outp; - inp = (union lconv *)((uint8_t *)base + offset); - outp = (union lconv *)&data; - outp->uc[0] = inp->uc[3]; - outp->uc[1] = inp->uc[2]; - outp->uc[2] = inp->uc[1]; - outp->uc[3] = inp->uc[0]; + uint32_t data = *(volatile uint32_t *)((uintptr_t)base + byteOffset); + return _OSSwapInt32(data); } OS_INLINE -void -OSWriteSwapInt64( - volatile void * base, - uintptr_t offset, - uint64_t data +uint64_t +OSReadSwapInt64( + const volatile void * base, + uintptr_t byteOffset ) { - union llconv { - uint64_t ull; - uint8_t uc[8]; - } *inp, *outp; - inp = (union llconv *)((uint8_t *)base + offset); - outp = (union llconv *)&data; - outp->uc[0] = inp->uc[7]; - outp->uc[1] = inp->uc[6]; - outp->uc[2] = inp->uc[5]; - outp->uc[3] = inp->uc[4]; - outp->uc[4] = inp->uc[3]; - outp->uc[5] = inp->uc[2]; - outp->uc[6] = inp->uc[1]; - outp->uc[7] = inp->uc[0]; + uint64_t data = *(volatile uint64_t *)((uintptr_t)base + byteOffset); + return _OSSwapInt64(data); } -/* Generic byte swapping functions. */ +/* Functions for byte reversed stores. */ OS_INLINE -uint16_t -_OSSwapInt16( +void +OSWriteSwapInt16( + volatile void * base, + uintptr_t byteOffset, uint16_t data ) { - uint16_t temp = data; - return OSReadSwapInt16(&temp, 0); + *(volatile uint16_t *)((uintptr_t)base + byteOffset) = _OSSwapInt16(data); } OS_INLINE -uint32_t -_OSSwapInt32( +void +OSWriteSwapInt32( + volatile void * base, + uintptr_t byteOffset, uint32_t data ) { - uint32_t temp = data; - return OSReadSwapInt32(&temp, 0); + *(volatile uint32_t *)((uintptr_t)base + byteOffset) = _OSSwapInt32(data); } OS_INLINE -uint64_t -_OSSwapInt64( - uint64_t data +void +OSWriteSwapInt64( + volatile void * base, + uintptr_t byteOffset, + uint64_t data ) { - uint64_t temp = data; - return OSReadSwapInt64(&temp, 0); + *(volatile uint64_t *)((uintptr_t)base + byteOffset) = _OSSwapInt64(data); } #endif /* ! _OS_OSBYTEORDERMACHINE_H */ diff --git a/libkern/libkern/ppc/OSByteOrder.h b/libkern/libkern/ppc/OSByteOrder.h index 3fa0081bc..e545f5236 100644 --- a/libkern/libkern/ppc/OSByteOrder.h +++ b/libkern/libkern/ppc/OSByteOrder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,13 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ - #ifndef _OS_OSBYTEORDERPPC_H #define _OS_OSBYTEORDERPPC_H @@ -33,7 +26,13 @@ #include #if !defined(OS_INLINE) +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L # define OS_INLINE static inline +# elif defined(__MWERKS__) || defined(__cplusplus) +# define OS_INLINE static inline +# else +# define OS_INLINE static __inline__ +# endif #endif /* Functions for byte reversed loads. */ @@ -41,50 +40,53 @@ OS_INLINE uint16_t OSReadSwapInt16( - const volatile void * base, - uintptr_t offset + const volatile void * base, + uintptr_t byteOffset ) { uint16_t result; - __asm__ volatile("lhbrx %0, %1, %2" - : "=r" (result) - : "b%" (base), "r" (offset) - : "memory"); + volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset); + + __asm__ ("lhbrx %0, %2, %1" + : "=r" (result) + : "r" (base), "bO" (byteOffset), "m" (*addr)); return result; } OS_INLINE uint32_t OSReadSwapInt32( - const volatile void * base, - uintptr_t offset + const volatile void * base, + uintptr_t byteOffset ) { uint32_t result; - __asm__ volatile("lwbrx %0, %1, %2" - : "=r" (result) - : "b%" (base), "r" (offset) - : "memory"); + volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset); + + __asm__ ("lwbrx %0, %2, %1" + : "=r" (result) + : "r" (base), "bO" (byteOffset), "m" (*addr)); return result; } OS_INLINE uint64_t OSReadSwapInt64( - const volatile void * base, - uintptr_t offset + const volatile void * base, + uintptr_t byteOffset ) { - const volatile uint64_t * inp; - union ullc { - uint64_t ull; - uint32_t ul[2]; - } outv; - - inp = (const volatile uint64_t *)base; - outv.ul[0] = OSReadSwapInt32(inp, offset + 4); - outv.ul[1] = OSReadSwapInt32(inp, offset); - return outv.ull; + volatile uint64_t *addr = (volatile uint64_t *)((uintptr_t)base + byteOffset); + union { + uint64_t u64; + uint32_t u32[2]; + } u; + + __asm__ ("lwbrx %0, %3, %2\n\t" + "lwbrx %1, %4, %2" + : "=&r" (u.u32[1]), "=r" (u.u32[0]) + : "r" (base), "bO" (byteOffset), "b" (byteOffset + 4), "m" (*addr)); + return u.u64; } /* Functions for byte reversed stores. */ @@ -92,49 +94,49 @@ OSReadSwapInt64( OS_INLINE void OSWriteSwapInt16( - volatile void * base, - uintptr_t offset, - uint16_t data + volatile void * base, + uintptr_t byteOffset, + uint16_t data ) { - __asm__ volatile("sthbrx %0, %1, %2" - : - : "r" (data), "b%" (base), "r" (offset) - : "memory"); + volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset); + + __asm__ ("sthbrx %1, %3, %2" + : "=m" (*addr) + : "r" (data), "r" (base), "bO" (byteOffset)); } OS_INLINE void OSWriteSwapInt32( - volatile void * base, - uintptr_t offset, - uint32_t data + volatile void * base, + uintptr_t byteOffset, + uint32_t data ) { - __asm__ volatile("stwbrx %0, %1, %2" - : - : "r" (data), "b%" (base), "r" (offset) - : "memory" ); + volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset); + + __asm__ ("stwbrx %1, %3, %2" + : "=m" (*addr) + : "r" (data), "r" (base), "bO" (byteOffset)); } OS_INLINE void OSWriteSwapInt64( - volatile void * base, - uintptr_t offset, - uint64_t data + volatile void * base, + uintptr_t byteOffset, + uint64_t data ) { - volatile uint64_t * outp; - volatile union ullc { - uint64_t ull; - uint32_t ul[2]; - } *inp; - - outp = (volatile uint64_t *)base; - inp = (volatile union ullc *)&data; - OSWriteSwapInt32(outp, offset, inp->ul[1]); - OSWriteSwapInt32(outp, offset + 4, inp->ul[0]); + volatile uint64_t *addr = (volatile uint64_t *)((uintptr_t)base + byteOffset); + uint32_t hi = data >> 32; + uint32_t lo = data & 0xffffffff; + + __asm__ ("stwbrx %1, %4, %3\n\t" + "stwbrx %2, %5, %3" + : "=m" (*addr) + : "r" (lo), "r" (hi), "r" (base), "bO" (byteOffset), "b" (byteOffset + 4)); } /* Generic byte swapping functions. */ @@ -142,31 +144,28 @@ OSWriteSwapInt64( OS_INLINE uint16_t _OSSwapInt16( - uint16_t data + uint16_t data ) { - uint16_t temp = data; - return OSReadSwapInt16(&temp, 0); + return OSReadSwapInt16(&data, 0); } OS_INLINE uint32_t _OSSwapInt32( - uint32_t data + uint32_t data ) { - uint32_t temp = data; - return OSReadSwapInt32(&temp, 0); + return OSReadSwapInt32(&data, 0); } OS_INLINE uint64_t _OSSwapInt64( - uint64_t data + uint64_t data ) { - uint64_t temp = data; - return OSReadSwapInt64(&temp, 0); + return OSReadSwapInt64(&data, 0); } #endif /* ! _OS_OSBYTEORDERPPC_H */ diff --git a/libkern/mach-o/loader.h b/libkern/mach-o/loader.h index 59450ff88..85c23d70a 100644 --- a/libkern/mach-o/loader.h +++ b/libkern/mach-o/loader.h @@ -44,12 +44,6 @@ */ #include -/* - * XXX historically, we have not included this header. Continue to not do so. - * - * #include - */ - /* * The mach header appears at the very beginning of the object file; it * is the same for both 32-bit and 64-bit architectures. @@ -66,11 +60,11 @@ struct mach_header { /* Constant for the magic field of the mach_header (32-bit architectures) */ #define MH_MAGIC 0xfeedface /* the mach magic number */ -#define MH_CIGAM NXSwapInt(MH_MAGIC) +#define MH_CIGAM 0xcefaedfe /* Constant for the magic field of the mach_header_64 (64-bit architectures) */ #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ -#define MH_CIGAM_64 NXSwapInt(MH_MAGIC_64) +#define MH_CIGAM_64 0xcffaedfe /* Constants for the cmd field of new load commands, the type */ #define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ diff --git a/libsa/catalogue.cpp b/libsa/catalogue.cpp index 554fa5a3a..abaf609dd 100644 --- a/libsa/catalogue.cpp +++ b/libsa/catalogue.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,7 @@ bool validateExtensionDict(OSDictionary * extension, int index) { bool id_missing = false; bool is_kernel_resource = false; bool has_executable = false; + bool ineligible_for_safe_boot = false; OSString * bundleIdentifier = NULL; // do not release OSObject * rawValue = NULL; // do not release OSString * stringValue = NULL; // do not release @@ -121,6 +123,7 @@ bool validateExtensionDict(OSDictionary * extension, int index) { OSString * key = NULL; // do not release VERS_version vers; VERS_version compatible_vers; + char namep[16]; // unused but needed for PE_parse_boot_arg() // Info dict is a dictionary if (!OSDynamicCast(OSDictionary, extension)) { @@ -343,9 +346,10 @@ bool validateExtensionDict(OSDictionary * extension, int index) { keyIterator = NULL; } - // OSBundleRequired is a legal value - *not* required at boot time - // so we can do install CDs and the like with mkext files containing - // all normally-used drivers. + // OSBundleRequired, if present, must have a legal value. + // If it is not present and if we are safe-booting, + // then the kext is not eligible. + // rawValue = extension->getObject("OSBundleRequired"); if (rawValue) { stringValue = OSDynamicCast(OSString, rawValue); @@ -363,6 +367,10 @@ bool validateExtensionDict(OSDictionary * extension, int index) { goto finish; } + } else if (PE_parse_boot_arg("-x", namep)) { /* safe boot */ + ineligible_for_safe_boot = true; + result = false; + goto finish; } @@ -370,19 +378,24 @@ bool validateExtensionDict(OSDictionary * extension, int index) { if (keyIterator) keyIterator->release(); if (!result) { - if (not_a_dict) { + if (ineligible_for_safe_boot) { + IOLog(VTYELLOW "Skipping extension \"%s\" during safe boot " + "(no OSBundleRequired property)\n" + VTRESET, + bundleIdentifier->getCStringNoCopy()); + } else if (not_a_dict) { if (index > -1) { - IOLog(VTYELLOW "mkext entry %d:." VTRESET, index); + IOLog(VTYELLOW "mkext entry %d: " VTRESET, index); } else { - IOLog(VTYELLOW "kernel extension" VTRESET); + IOLog(VTYELLOW "kernel extension " VTRESET); } IOLog(VTYELLOW "info dictionary isn't a dictionary\n" VTRESET); } else if (id_missing) { if (index > -1) { - IOLog(VTYELLOW "mkext entry %d:." VTRESET, index); + IOLog(VTYELLOW "mkext entry %d: " VTRESET, index); } else { - IOLog(VTYELLOW "kernel extension" VTRESET); + IOLog(VTYELLOW "kernel extension " VTRESET); } IOLog(VTYELLOW "\"CFBundleIdentifier\" property is " "missing or not a string\n" @@ -706,13 +719,12 @@ OSDictionary * readExtension(OSDictionary * propertyDict, bootxDriverDataObject->getBytesNoCopy(0, sizeof(MemoryMapFileInfo)); #if defined (__ppc__) - dataBuffer = (BootxDriverInfo *)ml_static_ptovirt( - driverInfo->paddr); + dataBuffer = (BootxDriverInfo *)ml_static_ptovirt(driverInfo->paddr); #elif defined (__i386__) - dataBuffer = (BootxDriverInfo *)driverInfo->paddr; - dataBuffer->plistAddr = ml_static_ptovirt(dataBuffer->plistAddr); + dataBuffer = (BootxDriverInfo *)ml_boot_ptovirt(driverInfo->paddr); + dataBuffer->plistAddr = (char *)ml_boot_ptovirt((vm_address_t)dataBuffer->plistAddr); if (dataBuffer->moduleAddr) - dataBuffer->moduleAddr = ml_static_ptovirt(dataBuffer->moduleAddr); + dataBuffer->moduleAddr = (void *)ml_boot_ptovirt((vm_address_t)dataBuffer->moduleAddr); #else #error unsupported architecture #endif @@ -801,7 +813,7 @@ OSDictionary * readExtension(OSDictionary * propertyDict, finish: if (loaded_kmod) { - kfree((unsigned int)loaded_kmod, sizeof(kmod_info_t)); + kfree(loaded_kmod, sizeof(kmod_info_t)); } // do not release bootxDriverDataObject @@ -951,7 +963,7 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, #if defined (__ppc__) mkext_data = (mkext_header *)mkext_file_info->paddr; #elif defined (__i386__) - mkext_data = (mkext_header *)ml_static_ptovirt(mkext_file_info->paddr); + mkext_data = (mkext_header *)ml_boot_ptovirt(mkext_file_info->paddr); #else #error unsupported architecture #endif @@ -983,6 +995,16 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, goto finish; } + IORegistryEntry * root = IORegistryEntry::getRegistryRoot(); + assert(root); + OSData * checksumObj = OSData::withBytes((void *)&checksum, + sizeof(checksum)); + assert(checksumObj); + if (checksumObj) { + root->setProperty(kIOStartupMkextCRC, checksumObj); + checksumObj->release(); + } + /* If the MKEXT archive isn't fat, check that the CPU type & subtype * match that of the running kernel. */ @@ -1024,7 +1046,7 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, i++) { if (loaded_kmod) { - kfree((unsigned int)loaded_kmod, sizeof(kmod_info_t)); + kfree(loaded_kmod, sizeof(kmod_info_t)); loaded_kmod = 0; } @@ -1202,7 +1224,7 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, finish: - if (loaded_kmod) kfree((unsigned int)loaded_kmod, sizeof(kmod_info_t)); + if (loaded_kmod) kfree(loaded_kmod, sizeof(kmod_info_t)); if (driverPlistDataObject) { kmem_free(kernel_map, (unsigned int)driverPlistDataObject->getBytesNoCopy(), diff --git a/libsa/kext.cpp b/libsa/kext.cpp index 70defb292..e9c8f0954 100644 --- a/libsa/kext.cpp +++ b/libsa/kext.cpp @@ -166,7 +166,6 @@ bool getKext( if (!uncompressModule(compressedCode, &driverCode)) { IOLog("extension \"%s\": couldn't uncompress code\n", bundleid); - LOG_DELAY(1); result = false; goto finish; } @@ -317,7 +316,7 @@ bool kextIsDependency(const char * kext_name, char * is_kernel) { extDict->getObject("compressedCode")); if ((driverCode || compressedCode) && is_kernel && *is_kernel) { - *is_kernel = 2; + *is_kernel = 2; } if (!driverCode && !compressedCode && !isKernelResourceObj) { @@ -333,8 +332,8 @@ bool kextIsDependency(const char * kext_name, char * is_kernel) { /********************************************************************* *********************************************************************/ static bool -figureDependenciesForKext(OSDictionary * kextPlist, - OSDictionary * dependencies, +addDependenciesForKext(OSDictionary * kextPlist, + OSArray * dependencyList, OSString * trueParent, Boolean skipKernelDependencies) { @@ -344,6 +343,7 @@ figureDependenciesForKext(OSDictionary * kextPlist, OSDictionary * libraries = 0; // don't release OSCollectionIterator * keyIterator = 0; // must release OSString * libraryName = 0; // don't release + OSString * dependentName = 0; // don't release kextName = OSDynamicCast(OSString, kextPlist->getObject("CFBundleIdentifier")); @@ -367,6 +367,8 @@ figureDependenciesForKext(OSDictionary * kextPlist, goto finish; } + dependentName = trueParent ? trueParent : kextName; + while ( (libraryName = OSDynamicCast(OSString, keyIterator->getNextObject())) ) { @@ -383,12 +385,15 @@ figureDependenciesForKext(OSDictionary * kextPlist, } else { char is_kernel_component; - if (!kextIsDependency(libraryName->getCStringNoCopy(), &is_kernel_component)) + if (!kextIsDependency(libraryName->getCStringNoCopy(), + &is_kernel_component)) { + is_kernel_component = false; + } if (!skipKernelDependencies || !is_kernel_component) { - dependencies->setObject(libraryName, - trueParent ? trueParent : kextName); + dependencyList->setObject(dependentName); + dependencyList->setObject(libraryName); } if (!hasDirectKernelDependency && is_kernel_component) { hasDirectKernelDependency = true; @@ -396,11 +401,22 @@ figureDependenciesForKext(OSDictionary * kextPlist, } } if (!hasDirectKernelDependency) { + const OSSymbol * kernelName = 0; + /* a kext without any kernel dependency is assumed dependent on 6.0 */ - dependencies->setObject("com.apple.kernel.libkern", - trueParent ? trueParent : kextName); + dependencyList->setObject(dependentName); + + kernelName = OSSymbol::withCString("com.apple.kernel.libkern"); + if (!kernelName) { + // XXX: Add log message + result = false; + goto finish; + } + dependencyList->setObject(kernelName); + kernelName->release(); + IOLog("Extension \"%s\" has no kernel dependency.\n", - kextName->getCStringNoCopy()); + kextName->getCStringNoCopy()); } finish: @@ -446,14 +462,8 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) { bool result = true; OSDictionary * kextPlist = 0; // don't release - OSDictionary * workingDependencies = 0; // must release - OSDictionary * pendingDependencies = 0; // must release - OSDictionary * swapDict = 0; // don't release - OSString * dependentName = 0; // don't release - const char * dependent_name = 0; // don't free - OSString * libraryName = 0; // don't release - const char * library_name = 0; // don't free - OSCollectionIterator * dependencyIterator = 0; // must release + unsigned int index = 0; + OSArray * dependencyList = 0; // must release unsigned char * code = 0; unsigned long code_length = 0; bool code_is_kmem = false; @@ -461,9 +471,11 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) char is_kernel_component = false; dgraph_entry_t * dgraph_entry = 0; // don't free dgraph_entry_t * dgraph_dependency = 0; // don't free - unsigned int graph_depth = 0; bool kext_is_dependency = true; + /***** + * Set up the root kmod. + */ if (!getKext(kmod_name, &kextPlist, &code, &code_length, &code_is_kmem)) { IOLog("can't find extension %s\n", kmod_name); @@ -495,8 +507,7 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) } // pass ownership of code to kld patcher - if (code) - { + if (code) { if (kload_map_entry(dgraph_entry) != kload_error_none) { IOLog("can't map %s in preparation for loading\n", kmod_name); result = false; @@ -509,95 +520,78 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) code_length = 0; code_is_kmem = false; - workingDependencies = OSDictionary::withCapacity(5); - if (!workingDependencies) { - IOLog("memory allocation failure\n"); - result = false; - goto finish; - } - - pendingDependencies = OSDictionary::withCapacity(5); - if (!pendingDependencies) { + /***** + * Now handle all the dependencies. + */ + dependencyList = OSArray::withCapacity(5); + if (!dependencyList) { IOLog("memory allocation failure\n"); result = false; goto finish; } - if (!figureDependenciesForKext(kextPlist, workingDependencies, NULL, false)) { + index = 0; + if (!addDependenciesForKext(kextPlist, dependencyList, NULL, false)) { IOLog("can't determine immediate dependencies for extension %s\n", kmod_name); result = false; goto finish; } - graph_depth = 0; - while (workingDependencies->getCount()) { - if (graph_depth > 255) { + /* IMPORTANT: loop condition gets list count every time through, as the + * array CAN change each iteration. + */ + for (index = 0; index < dependencyList->getCount(); index += 2) { + OSString * dependentName = 0; + OSString * libraryName = 0; + const char * dependent_name = 0; + const char * library_name = 0; + + /* 255 is an arbitrary limit. Multiplied by 2 because the dependency + * list is stocked with pairs (dependent -> dependency). + */ + if (index > (2 * 255)) { IOLog("extension dependency graph ridiculously long, indicating a loop\n"); result = false; goto finish; } - if (dependencyIterator) { - dependencyIterator->release(); - dependencyIterator = 0; - } + dependentName = OSDynamicCast(OSString, + dependencyList->getObject(index)); + libraryName = OSDynamicCast(OSString, + dependencyList->getObject(index + 1)); - dependencyIterator = OSCollectionIterator::withCollection( - workingDependencies); - if (!dependencyIterator) { - IOLog("memory allocation failure\n"); + if (!dependentName || !libraryName) { + IOLog("malformed dependency list\n"); result = false; goto finish; } - while ( (libraryName = - OSDynamicCast(OSString, dependencyIterator->getNextObject())) ) { + dependent_name = dependentName->getCStringNoCopy(); + library_name = libraryName->getCStringNoCopy(); - library_name = libraryName->getCStringNoCopy(); + if (!getKext(library_name, &kextPlist, NULL, NULL, NULL)) { - dependentName = OSDynamicCast(OSString, - workingDependencies->getObject(libraryName)); - - dependent_name = dependentName->getCStringNoCopy(); + IOLog("can't find extension %s\n", library_name); + result = false; + goto finish; + } + OSString * string = OSDynamicCast(OSString, + kextPlist->getObject("OSBundleSharedExecutableIdentifier")); + if (string) { + library_name = string->getCStringNoCopy(); if (!getKext(library_name, &kextPlist, NULL, NULL, NULL)) { IOLog("can't find extension %s\n", library_name); result = false; goto finish; } + } - OSString * string; - if ((string = OSDynamicCast(OSString, - kextPlist->getObject("OSBundleSharedExecutableIdentifier")))) - { - library_name = string->getCStringNoCopy(); - if (!getKext(library_name, &kextPlist, NULL, NULL, NULL)) { - IOLog("can't find extension %s\n", library_name); - result = false; - goto finish; - } - } - - kext_is_dependency = kextIsDependency(library_name, - &is_kernel_component); - - if (!kext_is_dependency) { - - /* For binaryless kexts, add a new pending dependency from the - * original dependent onto the dependencies of the current, - * binaryless, dependency. - */ - if (!figureDependenciesForKext(kextPlist, pendingDependencies, - dependentName, true)) { + kext_is_dependency = kextIsDependency(library_name, + &is_kernel_component); - IOLog("can't determine immediate dependencies for extension %s\n", - library_name); - result = false; - goto finish; - } - continue; - } else { + if (kext_is_dependency) { dgraph_entry = dgraph_find_dependent(dgraph, dependent_name); if (!dgraph_entry) { IOLog("internal error with dependency graph\n"); @@ -652,8 +646,8 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) /* Now put the library's dependencies onto the pending set. */ - if (!figureDependenciesForKext(kextPlist, pendingDependencies, - NULL, false)) { + if (!addDependenciesForKext(kextPlist, dependencyList, + kext_is_dependency ? NULL : dependentName, !kext_is_dependency)) { IOLog("can't determine immediate dependencies for extension %s\n", library_name); @@ -662,23 +656,12 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) } } - dependencyIterator->release(); - dependencyIterator = 0; - - workingDependencies->flushCollection(); - swapDict = workingDependencies; - workingDependencies = pendingDependencies; - pendingDependencies = swapDict; - graph_depth++; - } - finish: if (code && code_is_kmem) { kmem_free(kernel_map, (unsigned int)code, code_length); } - if (workingDependencies) workingDependencies->release(); - if (pendingDependencies) pendingDependencies->release(); - if (dependencyIterator) dependencyIterator->release(); + if (dependencyList) dependencyList->release(); + return result; } @@ -706,7 +689,7 @@ kern_return_t load_kernel_extension(char * kmod_name) /* See if the kmod is already loaded. */ if ((kmod_info = kmod_lookupbyname_locked(kmod_name))) { - kfree((vm_offset_t) kmod_info, sizeof(kmod_info_t)); + kfree(kmod_info, sizeof(kmod_info_t)); return KERN_SUCCESS; } diff --git a/libsa/kld_patch.c b/libsa/kld_patch.c index d73f1b2d0..18892a019 100644 --- a/libsa/kld_patch.c +++ b/libsa/kld_patch.c @@ -33,6 +33,7 @@ #include #if !KERNEL #include +#include #endif #if KERNEL @@ -626,13 +627,13 @@ kld_macho_swap(struct mach_header * mh) cmd < ncmds; cmd++, seg = (struct segment_command *)(((vm_offset_t)seg) + seg->cmdsize)) { - if (NXSwapLong(LC_SYMTAB) == seg->cmd) { + if (OSSwapConstInt32(LC_SYMTAB) == seg->cmd) { swap_symtab_command((struct symtab_command *) seg, hostOrder); swap_nlist((struct nlist *) (((vm_offset_t) mh) + ((struct symtab_command *) seg)->symoff), ((struct symtab_command *) seg)->nsyms, hostOrder); continue; } - if (NXSwapLong(LC_SEGMENT) != seg->cmd) { + if (OSSwapConstInt32(LC_SEGMENT) != seg->cmd) { swap_load_command((struct load_command *) seg, hostOrder); continue; } @@ -746,21 +747,21 @@ static Boolean findBestArch(struct fileRecord *file, const char *pathName) unsigned long i; struct fat_arch *arch; - fat->nfat_arch = NXSwapBigLongToHost(fat->nfat_arch); + fat->nfat_arch = OSSwapBigToHostInt32(fat->nfat_arch); return_if(file->fMapSize < sizeof(struct fat_header) + fat->nfat_arch * sizeof(struct fat_arch), false, ("%s is too fat\n", file->fPath)); arch = (struct fat_arch *) &fat[1]; for (i = 0; i < fat->nfat_arch; i++) { - arch[i].cputype = NXSwapBigLongToHost(arch[i].cputype); - arch[i].cpusubtype = NXSwapBigLongToHost(arch[i].cpusubtype); - arch[i].offset = NXSwapBigLongToHost(arch[i].offset); - arch[i].size = NXSwapBigLongToHost(arch[i].size); - arch[i].align = NXSwapBigLongToHost(arch[i].align); + arch[i].cputype = OSSwapBigToHostInt32(arch[i].cputype); + arch[i].cpusubtype = OSSwapBigToHostInt32(arch[i].cpusubtype); + arch[i].offset = OSSwapBigToHostInt32(arch[i].offset); + arch[i].size = OSSwapBigToHostInt32(arch[i].size); + arch[i].align = OSSwapBigToHostInt32(arch[i].align); } - magic = NXSwapBigLongToHost(fat->magic); + magic = OSSwapBigToHostInt32(fat->magic); } // Now see if we can find any valid architectures @@ -1297,7 +1298,7 @@ relocateSection(const struct fileRecord *file, struct sectionRecord *sectionRec) void * addr = *entry; #if !KERNEL if (file->fSwapped) - addr = (void *) NXSwapLong((long) addr); + addr = (void *) OSSwapInt32((uint32_t) addr); #endif symbol = findSymbolByAddress(file, addr); } @@ -1328,7 +1329,7 @@ findSymbolRefAtLocation(const struct fileRecord *file, void * addr = *loc; #if !KERNEL if (file->fSwapped) - addr = (void *) NXSwapLong((long) addr); + addr = (void *) OSSwapInt32((uint32_t) addr); #endif result = findSymbolByAddress(file, addr); if (!result) @@ -1728,7 +1729,7 @@ static Boolean resolveKernelVTable(struct metaClassRecord *metaClass) void * addr = *curEntry; #if !KERNEL if (file->fSwapped) - addr = (void *) NXSwapLong((long) addr); + addr = (void *) OSSwapInt32((uint32_t) addr); #endif curPatch->fSymbol = (struct nlist *) findSymbolByAddress(file, addr); diff --git a/makedefs/MakeInc.cmd b/makedefs/MakeInc.cmd index b9aa84920..52ee80340 100644 --- a/makedefs/MakeInc.cmd +++ b/makedefs/MakeInc.cmd @@ -7,7 +7,7 @@ MD= /usr/bin/md RM = /bin/rm -f CP = /bin/cp -LN = /bin/ln -s +LN = /bin/ln -fs CAT = /bin/cat MKDIR = /bin/mkdir -p diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def index 73842031c..937a465c3 100644 --- a/makedefs/MakeInc.def +++ b/makedefs/MakeInc.def @@ -21,7 +21,7 @@ endif # ifndef COMPONENT_LIST export COMPONENT_LIST = osfmk bsd iokit pexpert libkern libsa -export COMPONENT_LIST_UC := $(shell echo -n $(COMPONENT_LIST) | $(TR) a-z A-Z) +export COMPONENT_LIST_UC := $(shell printf "%s" "$(COMPONENT_LIST)" | $(TR) a-z A-Z) endif ifndef COMPONENT export COMPONENT := $(firstword $(subst /, ,$(shell $(RELPATH) $(SRCROOT) $(SOURCE)))) @@ -39,14 +39,14 @@ endif # ifndef ARCH_CONFIGS ifdef RC_ARCHS -export ARCH_CONFIGS := $(shell echo -n $(RC_ARCHS) | $(TR) a-z A-Z) +export ARCH_CONFIGS := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z) else export ARCH_CONFIGS := $(shell arch | $(TR) a-z A-Z) endif endif ifdef ARCH_CONFIG ifndef ARCH_CONFIG_LC -export ARCH_CONFIG_LC := $(shell echo -n $(ARCH_CONFIG) | $(TR) A-Z a-z) +export ARCH_CONFIG_LC := $(shell printf "%s" "$(ARCH_CONFIG)" | $(TR) A-Z a-z) endif endif @@ -54,10 +54,19 @@ endif # Kernel Configuration options # # supported configurations : RELEASE DEBUG PROFILE -# +# By default, make wll build RELEASE, otherwise the value of KERNEL_CONFIG +# will be used as kernel configuration. If KERNEL_CONFIGS (plural) is set +# it will override KERNEL_CONFIG. Make sure to set KERNEL_CONFIGS because +# build_all rule loops over it when building. + ifndef KERNEL_CONFIGS +ifndef KERNEL_CONFIG export KERNEL_CONFIGS = RELEASE +else +export KERNEL_CONFIGS = $(KERNEL_CONFIG) endif +endif + ifndef KERNEL_CONFIG export KERNEL_CONFIG = $(firstword $(KERNEL_CONFIGS)) endif @@ -74,7 +83,7 @@ export INSTALL_ARCHS = $(ARCH_CONFIGS) else export INSTALL_ARCHS = $(ARCH_CONFIGS) endif -export INSTALL_ARCHS_LC := $(shell echo -n $(ARCH_CONFIGS) | $(TR) A-Z a-z) +export INSTALL_ARCHS_LC := $(shell printf "%s" "$(ARCH_CONFIGS)" | $(TR) A-Z a-z) endif export INSTALL_ARCH_DEFAULT = PPC @@ -87,9 +96,8 @@ export DEFINES = -DAPPLE -DNeXT -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE - # # Compiler command # -KCC = /usr/bin/cc -KC++ = /usr/bin/c++ -CC = $(KCC) +KCC := $(CC) +KC++ := $(CXX) # # Compiler warning flags @@ -112,10 +120,9 @@ MWARNFLAGS_STD = \ export MWARNFLAGS ?= $(MWARNFLAGS_STD) CXXWARNFLAGS_STD = \ - -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ + -Wall -Wno-format-y2k -W \ -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ - -Wshadow -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls \ - -fpermissive + -Wshadow -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD) @@ -138,15 +145,14 @@ endif export CFLAGS_GEN = -static -g -nostdinc -nostdlib -no-cpp-precomp \ -fno-builtin -finline -fno-keep-inline-functions -msoft-float \ - -fsigned-bitfields $(OTHER_CFLAGS) + -fsigned-bitfields $(OTHER_CFLAGS) -force_cpusubtype_ALL export CFLAGS_RELEASE = export CFLAGS_DEBUG = export CFLAGS_PROFILE = -pg export CFLAGS_PPC = -arch ppc -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED -export CFLAGS_I386 = -arch i386 -Di386 -DI386 -D__I386__ -DPAGE_SIZE_FIXED \ - -march=i686 -mpreferred-stack-boundary=2 -falign-functions=4 -mcpu=pentium4 -force_cpusubtype_ALL +export CFLAGS_I386 = -arch i386 -Di386 -DI386 -D__I386__ -DPAGE_SIZE_FIXED export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns @@ -162,9 +168,11 @@ export CFLAGS = $(CFLAGS_GEN) \ $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),CFLAGS_))) \ $(DEFINES) +export MIGCC = $(CC) + # Default C++ flags # -CXXFLAGS_GEN = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext -fpermissive +CXXFLAGS_GEN = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext CXXFLAGS = $(CXXFLAGS_GEN) \ $($(addsuffix $(ARCH_CONFIG),CXXFLAGS_)) \ @@ -173,8 +181,8 @@ CXXFLAGS = $(CXXFLAGS_GEN) \ # # Assembler command # -AS = /usr/bin/cc -S_KCC = /usr/bin/cc +AS = $(CC) +S_KCC = $(CC) # # Default SFLAGS @@ -196,7 +204,7 @@ export SFLAGS = $(SFLAGS_GEN) \ # # Linker command # -LD = /usr/bin/ld +LD = $(KC++) -nostdlib # # Default LDFLAGS @@ -216,15 +224,16 @@ export LDFLAGS_COMPONENT = $(LDFLAGS_COMPONENT_GEN) \ export LDFLAGS_KERNEL_GEN = \ -static \ + -fapple-kext \ -force_cpusubtype_ALL \ - -e __start \ - -segalign 0x1000 \ - -sectalign __TEXT __text 0x1000 \ - -sectalign __DATA __common 0x1000 \ - -sectalign __DATA __bss 0x1000 \ - -sectcreate __PRELINK __text /dev/null \ - -sectcreate __PRELINK __symtab /dev/null \ - -sectcreate __PRELINK __info /dev/null + -Wl,-e,__start \ + -Wl,-segalign,0x1000 \ + -Wl,-sectalign,__TEXT,__text,0x1000 \ + -Wl,-sectalign,__DATA,__common,0x1000 \ + -Wl,-sectalign,__DATA,__bss,0x1000 \ + -Wl,-sectcreate,__PRELINK,__text,/dev/null \ + -Wl,-sectcreate,__PRELINK,__symtab,/dev/null \ + -Wl,-sectcreate,__PRELINK,__info,/dev/null export LDFLAGS_KERNEL_RELEASE = # -noseglinkedit @@ -233,14 +242,14 @@ export LDFLAGS_KERNEL_PROFILE = export LDFLAGS_KERNEL_PPC = \ -arch ppc \ - -segaddr __VECTORS 0x0 \ - -segaddr __HIB 0x7000 \ - -segaddr __TEXT 0xe000 + -Wl,-segaddr,__VECTORS,0x0 \ + -Wl,-segaddr,__HIB,0x7000 \ + -Wl,-segaddr,__TEXT,0xe000 export LDFLAGS_KERNEL_I386 = \ -arch i386 \ - -segaddr __HIB 0xC0100000 \ - -segaddr __TEXT 0xC0111000 + -Wl,-segaddr,__HIB,0x100000 \ + -Wl,-segaddr,__TEXT,0x111000 export LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ $($(addsuffix $(ARCH_CONFIG),LDFLAGS_KERNEL_)) \ diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir index fd5b753b6..06f191b1f 100644 --- a/makedefs/MakeInc.dir +++ b/makedefs/MakeInc.dir @@ -384,24 +384,22 @@ clean: # cscope.files: @echo "Building file list for cscope and tags" - @find . -name '*.h' -type f > _cscope.files 2> /dev/null - @echo bsd/ufs/ufs/ufs_readwrite.c >> _cscope.files 2> /dev/null - @for i in `echo ${ALL_SUBDIRS}`; \ - do \ - cat ${SRCROOT}/$${i}/conf/files ${SRCROOT}/$${i}/conf/files.ppc; \ - cat ${SRCROOT}/$${i}/conf/files.i386; \ - done | \ - sed -e '/^#/d' -e '/^[ ]*$$/d' -e '/^OPTIONS\//d' | \ - sed -e '1,$$s/^\([^ ]*\)[ ].*$$/\1/' >> _cscope.files 2> /dev/null - @sort < _cscope.files > cscope.files 2> /dev/null - @rm -f _cscope.files 2> /dev/null + @find . -name '*.h' -type f | grep -v ^..BUILD > _cscope.files 2> /dev/null + @find . -name '*.defs' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.c' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.cpp' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.s' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.h.template' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @echo -k -q -c > cscope.files 2> /dev/null + @sort -u < _cscope.files >> cscope.files 2> /dev/null + @rm -f _cscope.files _cscope.files2 2> /dev/null # # Build cscope database # cscope: cscope.files @echo "Building cscope database" - @cscope -b 2> /dev/null + @cscope -bvU 2> /dev/null # # Build tags diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule index bfd896d29..d24d1fb75 100644 --- a/makedefs/MakeInc.rule +++ b/makedefs/MakeInc.rule @@ -604,7 +604,7 @@ do_build_mach_kernel: $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/kgmacros @install $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c; @$(SRCROOT)/config/newvers.pl $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c; ${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c -o $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.o - $(LD) $(LDFLAGS_KERNEL) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell echo -n $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.o -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS); \ + $(LD) $(LDFLAGS_KERNEL) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf "%s" "$(component)" | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.o -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS); \ $(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel; $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/kgmacros: $(SRCROOT)/kgmacros diff --git a/osfmk/chud/chud_cpu.c b/osfmk/chud/chud_cpu.c new file mode 100644 index 000000000..d8e0ef4a0 --- /dev/null +++ b/osfmk/chud/chud_cpu.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include + +#include +#include +#include + +#include + +#include + +#pragma mark **** cpu count **** + +__private_extern__ int +chudxnu_avail_cpu_count(void) +{ + host_basic_info_data_t hinfo; + kern_return_t kr; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + + kr = host_info(host_self(), HOST_BASIC_INFO, (integer_t *)&hinfo, &count); + if(kr == KERN_SUCCESS) { + return hinfo.avail_cpus; + } else { + return 0; + } +} + +__private_extern__ int +chudxnu_phys_cpu_count(void) +{ + host_basic_info_data_t hinfo; + kern_return_t kr; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + + kr = host_info(host_self(), HOST_BASIC_INFO, (integer_t *)&hinfo, &count); + if(kr == KERN_SUCCESS) { + return hinfo.max_cpus; + } else { + return 0; + } +} + +__private_extern__ +int chudxnu_cpu_number(void) +{ + return cpu_number(); +} + +#pragma mark **** branch trace buffer **** + +extern int pc_trace_buf[1024]; + +__private_extern__ uint32_t * +chudxnu_get_branch_trace_buffer(uint32_t *entries) +{ + if(entries) { + *entries = sizeof(pc_trace_buf)/sizeof(int); + } + return pc_trace_buf; +} + +#pragma mark **** interrupts enable/disable **** + +__private_extern__ boolean_t +chudxnu_get_interrupts_enabled(void) +{ + return ml_get_interrupts_enabled(); +} + +__private_extern__ boolean_t +chudxnu_set_interrupts_enabled(boolean_t enable) +{ + return ml_set_interrupts_enabled(enable); +} + +__private_extern__ boolean_t +chudxnu_at_interrupt_context(void) +{ + return ml_at_interrupt_context(); +} + +__private_extern__ void +chudxnu_cause_interrupt(void) +{ + ml_cause_interrupt(); +} + +#pragma mark **** preemption enable/disable **** + +__private_extern__ void +chudxnu_enable_preemption(void) +{ + enable_preemption(); +} + +__private_extern__ void +chudxnu_disable_preemption(void) +{ + disable_preemption(); +} + +__private_extern__ int +chudxnu_get_preemption_level(void) +{ + return get_preemption_level(); +} diff --git a/osfmk/ppc/chud/chud_glue.c b/osfmk/chud/chud_glue.c similarity index 100% rename from osfmk/ppc/chud/chud_glue.c rename to osfmk/chud/chud_glue.c diff --git a/osfmk/ppc/chud/chud_memory.c b/osfmk/chud/chud_memory.c similarity index 75% rename from osfmk/ppc/chud/chud_memory.c rename to osfmk/chud/chud_memory.c index 5529fe8b1..9eeabb69e 100644 --- a/osfmk/ppc/chud/chud_memory.c +++ b/osfmk/chud/chud_memory.c @@ -21,13 +21,16 @@ */ #include -#include -#include +#include +#include + +extern unsigned int IODefaultCacheBits(addr64_t pa); +extern unsigned int vm_page_free_count; __private_extern__ uint64_t chudxnu_avail_memory_size(void) { - return mem_size; + return max_mem; } __private_extern__ @@ -36,6 +39,16 @@ uint64_t chudxnu_phys_memory_size(void) return mem_actual; } +/* + * This function is not intended to be valid for any amount of time, + * it is just an instantaneous snapshot of the current free memory size. + */ +__private_extern__ +uint64_t chudxnu_free_memory_size(void) +{ + return (uint64_t)vm_page_free_count * (uint64_t)page_size; +} + __private_extern__ vm_offset_t chudxnu_io_map(uint64_t phys_addr, vm_size_t size) { diff --git a/osfmk/chud/chud_osfmk_callback.c b/osfmk/chud/chud_osfmk_callback.c new file mode 100644 index 000000000..7dc865a5a --- /dev/null +++ b/osfmk/chud/chud_osfmk_callback.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#pragma mark **** timer **** +__private_extern__ chud_timer_t +chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0) +{ + return (chud_timer_t)thread_call_allocate((thread_call_func_t)func, (thread_call_param_t)param0); +} + +__private_extern__ kern_return_t +chudxnu_timer_callback_enter( + chud_timer_t timer, + uint32_t param1, + uint32_t time, + uint32_t units) +{ + uint64_t t_delay; + clock_interval_to_deadline(time, units, &t_delay); + thread_call_enter1_delayed((thread_call_t)timer, (thread_call_param_t)param1, t_delay); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_timer_callback_cancel(chud_timer_t timer) +{ + thread_call_cancel((thread_call_t)timer); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_timer_free(chud_timer_t timer) +{ + thread_call_cancel((thread_call_t)timer); + thread_call_free((thread_call_t)timer); + return KERN_SUCCESS; +} + +#pragma mark **** thread timer - DEPRECATED **** + +static thread_call_t thread_timer_call = NULL; +static chudxnu_thread_timer_callback_func_t thread_timer_callback_fn = NULL; + +static void chudxnu_private_thread_timer_callback( + thread_call_param_t param0, + thread_call_param_t param1) +{ +#pragma unused (param1) + chudxnu_thread_timer_callback_func_t fn = thread_timer_callback_fn; + + if(thread_timer_call) { + thread_call_free(thread_timer_call); + thread_timer_call = NULL; + + if(fn) { + (fn)((uint32_t)param0); + } + } +} + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_thread_timer_callback_enter( + chudxnu_thread_timer_callback_func_t func, + uint32_t param, + uint32_t time, + uint32_t units) +{ + if(!thread_timer_call) { + uint64_t t_delay; + thread_timer_callback_fn = func; + + thread_timer_call = thread_call_allocate( + (thread_call_func_t) + chudxnu_private_thread_timer_callback, + (thread_call_param_t) + param); + clock_interval_to_deadline(time, units, &t_delay); + thread_call_enter_delayed(thread_timer_call, t_delay); + return KERN_SUCCESS; + } else { + return KERN_FAILURE; // thread timer call already pending + } +} + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_thread_timer_callback_cancel(void) +{ + if(thread_timer_call) { + thread_call_cancel(thread_timer_call); + thread_call_free(thread_timer_call); + thread_timer_call = NULL; + } + thread_timer_callback_fn = NULL; + return KERN_SUCCESS; +} diff --git a/osfmk/chud/chud_thread.c b/osfmk/chud/chud_thread.c new file mode 100644 index 000000000..bfaf88fd7 --- /dev/null +++ b/osfmk/chud/chud_thread.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +// include the correct file to find real_ncpus +#if defined(__i386__) || defined(__x86_64__) +# include +#endif // i386 or x86_64 + +#if defined(__ppc__) || defined(__ppc64__) +# include +#endif // ppc or ppc64 + +#pragma mark **** thread binding **** + +__private_extern__ kern_return_t +chudxnu_bind_thread(thread_t thread, int cpu) +{ + processor_t proc = NULL; + + if(cpu >= real_ncpus) // sanity check + return KERN_FAILURE; + + proc = cpu_to_processor(cpu); + + if(proc && !(proc->state == PROCESSOR_OFF_LINE) && + !(proc->state == PROCESSOR_SHUTDOWN)) { + /* disallow bind to shutdown processor */ + thread_bind(thread, proc); + if(thread==current_thread()) { + (void)thread_block(THREAD_CONTINUE_NULL); + } + return KERN_SUCCESS; + } + return KERN_FAILURE; +} + +__private_extern__ kern_return_t +chudxnu_unbind_thread(thread_t thread) +{ + thread_bind(thread, PROCESSOR_NULL); + return KERN_SUCCESS; +} + +#pragma mark **** task and thread info **** + +__private_extern__ +boolean_t chudxnu_is_64bit_task(task_t task) +{ + return (task_has_64BitAddr(task)); +} + +#define THING_TASK 0 +#define THING_THREAD 1 + +// an exact copy of processor_set_things() except no mig conversion at the end! +static kern_return_t +chudxnu_private_processor_set_things( + processor_set_t pset, + mach_port_t **thing_list, + mach_msg_type_number_t *count, + int type) +{ + unsigned int actual; /* this many things */ + unsigned int maxthings; + unsigned int i; + + vm_size_t size, size_needed; + void *addr; + + if (pset == PROCESSOR_SET_NULL) + return (KERN_INVALID_ARGUMENT); + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + + return (KERN_FAILURE); + } + + if (type == THING_TASK) + maxthings = pset->task_count; + else + maxthings = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = maxthings * sizeof (mach_port_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return (KERN_RESOURCE_SHORTAGE); + } + + /* OK, have memory and the processor_set is locked & active */ + + actual = 0; + switch (type) { + + case THING_TASK: + { + task_t task, *tasks = (task_t *)addr; + + for (task = (task_t)queue_first(&pset->tasks); + !queue_end(&pset->tasks, (queue_entry_t)task); + task = (task_t)queue_next(&task->pset_tasks)) { + task_reference_internal(task); + tasks[actual++] = task; + } + + break; + } + + case THING_THREAD: + { + thread_t thread, *threads = (thread_t *)addr; + + for (i = 0, thread = (thread_t)queue_first(&pset->threads); + !queue_end(&pset->threads, (queue_entry_t)thread); + thread = (thread_t)queue_next(&thread->pset_threads)) { + thread_reference_internal(thread); + threads[actual++] = thread; + } + + break; + } + } + + pset_unlock(pset); + + if (actual < maxthings) + size_needed = actual * sizeof (mach_port_t); + + if (actual == 0) { + /* no things, so return null pointer and deallocate memory */ + *thing_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } + else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + void *newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + switch (type) { + + case THING_TASK: + { + task_t *tasks = (task_t *)addr; + + for (i = 0; i < actual; i++) + task_deallocate(tasks[i]); + break; + } + + case THING_THREAD: + { + thread_t *threads = (thread_t *)addr; + + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + break; + } + } + + kfree(addr, size); + return (KERN_RESOURCE_SHORTAGE); + } + + bcopy((void *) addr, (void *) newaddr, size_needed); + kfree(addr, size); + addr = newaddr; + } + + *thing_list = (mach_port_t *)addr; + *count = actual; + } + + return (KERN_SUCCESS); +} + +// an exact copy of task_threads() except no mig conversion at the end! +static kern_return_t +chudxnu_private_task_threads( + task_t task, + thread_act_array_t *threads_out, + mach_msg_type_number_t *count) +{ + mach_msg_type_number_t actual; + thread_t *threads; + thread_t thread; + vm_size_t size, size_needed; + void *addr; + unsigned int i, j; + + if (task == TASK_NULL) + return (KERN_INVALID_ARGUMENT); + + size = 0; addr = 0; + + for (;;) { + task_lock(task); + if (!task->active) { + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + return (KERN_FAILURE); + } + + actual = task->thread_count; + + /* do we have the memory we need? */ + size_needed = actual * sizeof (mach_port_t); + if (size_needed <= size) + break; + + /* unlock the task and allocate more memory */ + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return (KERN_RESOURCE_SHORTAGE); + } + + /* OK, have memory and the task is locked & active */ + threads = (thread_t *)addr; + + i = j = 0; + + for (thread = (thread_t)queue_first(&task->threads); i < actual; + ++i, thread = (thread_t)queue_next(&thread->task_threads)) { + thread_reference_internal(thread); + threads[j++] = thread; + } + + assert(queue_end(&task->threads, (queue_entry_t)thread)); + + actual = j; + size_needed = actual * sizeof (mach_port_t); + + /* can unlock task now that we've got the thread refs */ + task_unlock(task); + + if (actual == 0) { + /* no threads, so return null pointer and deallocate memory */ + + *threads_out = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } + else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + void *newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; ++i) + thread_deallocate(threads[i]); + kfree(addr, size); + return (KERN_RESOURCE_SHORTAGE); + } + + bcopy(addr, newaddr, size_needed); + kfree(addr, size); + threads = (thread_t *)newaddr; + } + + *threads_out = threads; + *count = actual; + } + + return (KERN_SUCCESS); +} + + +__private_extern__ kern_return_t +chudxnu_all_tasks( + task_array_t *task_list, + mach_msg_type_number_t *count) +{ + return chudxnu_private_processor_set_things(&default_pset, (mach_port_t **)task_list, count, THING_TASK); +} + +__private_extern__ kern_return_t +chudxnu_free_task_list( + task_array_t *task_list, + mach_msg_type_number_t *count) +{ + vm_size_t size = (*count)*sizeof(mach_port_t); + void *addr = *task_list; + + if(addr) { + int i, maxCount = *count; + for(i=0; ilast_switch; + return KERN_SUCCESS; +} + diff --git a/osfmk/chud/chud_xnu.h b/osfmk/chud/chud_xnu.h new file mode 100644 index 000000000..992aa65f3 --- /dev/null +++ b/osfmk/chud/chud_xnu.h @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _CHUD_XNU_H_ +#define _CHUD_XNU_H_ + + +#include +#include +#include + +#pragma mark **** version **** +extern uint32_t chudxnu_version(void); + +#pragma mark **** task **** +// ******************************************************************************** +// task +// ******************************************************************************** +extern int chudxnu_pid_for_task(task_t task); +extern task_t chudxnu_task_for_pid(int pid); +extern int chudxnu_current_pid(void); + +extern kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size); +extern kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size); +extern kern_return_t chudxnu_kern_read(void *destaddr, vm_offset_t srcaddr, vm_size_t size); +extern kern_return_t chudxnu_kern_write(vm_offset_t destaddr, void *srcaddr, vm_size_t size); + +extern boolean_t chudxnu_is_64bit_task(task_t task); + +#pragma mark **** thread **** +// ******************************************************************************** +// thread +// ******************************************************************************** +extern kern_return_t chudxnu_bind_thread(thread_t thread, int cpu); +extern kern_return_t chudxnu_unbind_thread(thread_t thread); + +extern kern_return_t chudxnu_thread_get_state(thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, boolean_t user_only); +extern kern_return_t chudxnu_thread_set_state(thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count, boolean_t user_only); +extern kern_return_t chudxnu_thread_user_state_available(thread_t thread); + +extern kern_return_t chudxnu_thread_get_callstack(thread_t thread, uint32_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); +extern kern_return_t chudxnu_thread_get_callstack64(thread_t thread, uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); + +extern task_t chudxnu_current_task(void); +extern thread_t chudxnu_current_thread(void); + +extern task_t chudxnu_task_for_thread(thread_t thread); + +extern kern_return_t chudxnu_all_tasks(task_array_t *task_list, mach_msg_type_number_t *count); +extern kern_return_t chudxnu_free_task_list(task_array_t *task_list, mach_msg_type_number_t *count); + +extern kern_return_t chudxnu_all_threads(thread_array_t *thread_list, mach_msg_type_number_t *count); +extern kern_return_t chudxnu_task_threads(task_t task, thread_array_t *thread_list, mach_msg_type_number_t *count); +extern kern_return_t chudxnu_free_thread_list(thread_array_t *thread_list, mach_msg_type_number_t *count); + +extern kern_return_t chudxnu_thread_info( thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count); + +extern kern_return_t chudxnu_thread_last_context_switch(thread_t thread, uint64_t *timestamp); + +#pragma mark **** memory **** +// ******************************************************************************** +// memory +// ******************************************************************************** + +extern uint64_t chudxnu_avail_memory_size(void); +extern uint64_t chudxnu_phys_memory_size(void); +extern uint64_t chudxnu_free_memory_size(void); + +extern vm_offset_t chudxnu_io_map(uint64_t phys_addr, vm_size_t size); + +extern uint32_t chudxnu_phys_addr_wimg(uint64_t phys_addr); + +#pragma mark **** cpu **** +// ******************************************************************************** +// cpu +// ******************************************************************************** +extern int chudxnu_avail_cpu_count(void); +extern int chudxnu_phys_cpu_count(void); +extern int chudxnu_cpu_number(void); + +extern kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable); + +extern kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable); +extern boolean_t chudxnu_cpu_nap_enabled(int cpu); + +extern boolean_t chudxnu_get_interrupts_enabled(void); +extern boolean_t chudxnu_set_interrupts_enabled(boolean_t enable); +extern boolean_t chudxnu_at_interrupt_context(void); +extern void chudxnu_cause_interrupt(void); + +extern void chudxnu_enable_preemption(void); +extern void chudxnu_disable_preemption(void); +extern int chudxnu_get_preemption_level(void); + +extern kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val); +extern kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val); + +extern uint32_t chudxnu_get_orig_cpu_l2cr(int cpu); +extern uint32_t chudxnu_get_orig_cpu_l3cr(int cpu); + +extern kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p); +extern kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p); +extern kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val); +extern kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val); + +extern void chudxnu_flush_caches(void); +extern void chudxnu_enable_caches(boolean_t enable); + +extern kern_return_t chudxnu_perfmon_acquire_facility(task_t); +extern kern_return_t chudxnu_perfmon_release_facility(task_t); + +extern uint32_t * chudxnu_get_branch_trace_buffer(uint32_t *entries); + +typedef struct { + uint32_t hwResets; + uint32_t hwMachineChecks; + uint32_t hwDSIs; + uint32_t hwISIs; + uint32_t hwExternals; + uint32_t hwAlignments; + uint32_t hwPrograms; + uint32_t hwFloatPointUnavailable; + uint32_t hwDecrementers; + uint32_t hwIOErrors; + uint32_t hwSystemCalls; + uint32_t hwTraces; + uint32_t hwFloatingPointAssists; + uint32_t hwPerformanceMonitors; + uint32_t hwAltivecs; + uint32_t hwInstBreakpoints; + uint32_t hwSystemManagements; + uint32_t hwAltivecAssists; + uint32_t hwThermal; + uint32_t hwSoftPatches; + uint32_t hwMaintenances; + uint32_t hwInstrumentations; +} rupt_counters_t; + +extern kern_return_t chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts); +extern kern_return_t chudxnu_clear_cpu_rupt_counters(int cpu); + +extern kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable); + +extern kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data); +extern kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data); + +#pragma mark **** callbacks **** +// ******************************************************************************** +// callbacks +// ******************************************************************************** + +extern void chudxnu_cancel_all_callbacks(void); + +// cpu timer - each cpu has its own callback +typedef kern_return_t (*chudxnu_cpu_timer_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +extern kern_return_t chudxnu_cpu_timer_callback_enter(chudxnu_cpu_timer_callback_func_t func, uint32_t time, uint32_t units); // callback is entered on current cpu +extern kern_return_t chudxnu_cpu_timer_callback_cancel(void); // callback is cleared on current cpu +extern kern_return_t chudxnu_cpu_timer_callback_cancel_all(void); // callback is cleared on all cpus + +enum { + PPC_TRAP_PROGRAM = 0x700, + PPC_TRAP_TRACE = 0xD00, + PPC_TRAP_PERFMON = 0xF00, +}; + +enum { + X86_TRAP_DEBUG = 0x1, +}; + +// trap callback - one callback for system +typedef kern_return_t (*chudxnu_trap_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +extern kern_return_t chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func); +extern kern_return_t chudxnu_trap_callback_cancel(void); + +enum { + PPC_INTERRUPT_DECREMENTER = 0x900, + PPC_INTERRUPT_INTERRUPT = 0x500, + PPC_INTERRUPT_CPU_SIGNAL = 0x2200, +}; + +enum { + X86_INTERRUPT_PERFMON = 0xB, +}; + +// interrupt callback - one callback for system +typedef kern_return_t (*chudxnu_interrupt_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +extern kern_return_t chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func); +extern kern_return_t chudxnu_interrupt_callback_cancel(void); + +// ast callback - one callback for system +typedef kern_return_t (*chudxnu_perfmon_ast_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +extern kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func); +extern kern_return_t chudxnu_perfmon_ast_callback_cancel(void); +extern kern_return_t chudxnu_perfmon_ast_send(void); +extern kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent); + +// cpusig callback - one callback for system +typedef kern_return_t (*chudxnu_cpusig_callback_func_t)(int request, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +extern kern_return_t chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func); +extern kern_return_t chudxnu_cpusig_callback_cancel(void); +extern kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request); + +// kdebug callback - one callback for system +typedef kern_return_t (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); +extern kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func); +extern kern_return_t chudxnu_kdebug_callback_cancel(void); + +// timer callback - multiple callbacks +typedef kern_return_t (*chudxnu_timer_callback_func_t)(uint32_t param0, uint32_t param1); +typedef void * chud_timer_t; +extern chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0); +extern kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units); +extern kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer); +extern kern_return_t chudxnu_timer_free(chud_timer_t timer); + +// CHUD systemcall callback - one callback for system +//typedef kern_return_t (*chudxnu_syscall_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); // v2 +typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint32_t code, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); +extern kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); +extern kern_return_t chudxnu_syscall_callback_cancel(void); + +// ******************************************************************************** +// DEPRECATED +// ******************************************************************************** +extern kern_return_t chudxnu_bind_current_thread(int cpu); + +extern kern_return_t chudxnu_unbind_current_thread(void); + +extern kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); + +extern thread_t chudxnu_current_act(void); + +// thread timer callback - one callback for system +typedef kern_return_t (*chudxnu_thread_timer_callback_func_t)(uint32_t param); +extern kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t param, uint32_t time, uint32_t units); +extern kern_return_t chudxnu_thread_timer_callback_cancel(void); + +#endif /* _CHUD_XNU_H_ */ diff --git a/osfmk/i386/rtclock_entries.h b/osfmk/chud/chud_xnu_glue.h similarity index 57% rename from osfmk/i386/rtclock_entries.h rename to osfmk/chud/chud_xnu_glue.h index 7c2ef27aa..ab912af79 100644 --- a/osfmk/i386/rtclock_entries.h +++ b/osfmk/chud/chud_xnu_glue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,24 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * @OSF_COPYRIGHT@ - */ -extern int rtc_config(void); -extern int rtc_init(void); -extern kern_return_t rtc_gettime( - mach_timespec_t * curtime); -extern void rtc_gettime_interrupts_disabled( - mach_timespec_t * curtime); -extern kern_return_t rtc_settime( - mach_timespec_t * curtime); -extern kern_return_t rtc_getattr( - clock_flavor_t flavor, - clock_attr_t ttr, - mach_msg_type_number_t * count); -extern void rtc_setalrm( - mach_timespec_t * alarmtime); -extern void rtclock_intr( - struct i386_interrupt_state *regs); -extern void rtc_sleep_wakeup(void); +#if defined (__ppc__) +#include "ppc/chud_xnu_glue.h" +#elif defined (__i386__) +#include "i386/chud_xnu_glue.h" +#else +#error architecture not supported +#endif diff --git a/osfmk/chud/chud_xnu_private.h b/osfmk/chud/chud_xnu_private.h new file mode 100644 index 000000000..3028a70d3 --- /dev/null +++ b/osfmk/chud/chud_xnu_private.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _CHUD_XNU_PRIVATE_H_ +#define _CHUD_XNU_PRIVATE_H_ + +#include +#include +#include + +#if defined (__ppc__) +#include "chud/ppc/chud_xnu_private.h" +#elif defined (__i386__) +#include "chud/i386/chud_xnu_private.h" +#else +#error architecture not supported +#endif + +#endif /* _CHUD_XNU_PRIVATE_H_ */ diff --git a/osfmk/i386/AT386/bbclock_entries.h b/osfmk/chud/i386/chud_cpu_asm.h similarity index 73% rename from osfmk/i386/AT386/bbclock_entries.h rename to osfmk/chud/i386/chud_cpu_asm.h index 2cd9cc6c6..ee9308606 100644 --- a/osfmk/i386/AT386/bbclock_entries.h +++ b/osfmk/chud/i386/chud_cpu_asm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,14 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * @OSF_COPYRIGHT@ - */ -extern kern_return_t bbc_gettime( - mach_timespec_t * curtime); -extern kern_return_t bbc_settime( - mach_timespec_t * curtime); -extern int bbc_config(void); +#ifndef _CHUD_CPU_ASM_H_ +#define _CHUD_CPU_ASM_H_ -#define NO_SETALRM (void (*) (mach_timespec_t * alarm_time))0 +#endif // _CHUD_CPU_ASM_H_ diff --git a/osfmk/chud/i386/chud_cpu_asm.s b/osfmk/chud/i386/chud_cpu_asm.s new file mode 100644 index 000000000..838afe22e --- /dev/null +++ b/osfmk/chud/i386/chud_cpu_asm.s @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define ASSEMBLER +#include +#include + diff --git a/osfmk/chud/i386/chud_cpu_i386.c b/osfmk/chud/i386/chud_cpu_i386.c new file mode 100644 index 000000000..12d91f3b4 --- /dev/null +++ b/osfmk/chud/i386/chud_cpu_i386.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#pragma mark **** cpu enable/disable **** + +extern kern_return_t processor_start(processor_t processor); // osfmk/kern/processor.c +extern kern_return_t processor_exit(processor_t processor); // osfmk/kern/processor.c + +__private_extern__ +kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable) +{ + chudxnu_unbind_thread(current_thread()); + + if(cpu < 0 || (unsigned int)cpu >= real_ncpus) // sanity check + return KERN_FAILURE; + + if((cpu_data_ptr[cpu] != NULL) && cpu != master_cpu) { + processor_t processor = cpu_to_processor(cpu); + + if(processor == master_processor) // don't mess with the boot processor + return KERN_FAILURE; + + if(enable) { + // make sure it isn't already running + if(processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN) { + return processor_start(processor); + } + return KERN_SUCCESS; // it's already running + } else { + // make sure it hasn't already exited + if(processor->state != PROCESSOR_OFF_LINE && + processor->state != PROCESSOR_SHUTDOWN) { + return processor_exit(processor); + } + return KERN_SUCCESS; + } + } + return KERN_FAILURE; +} + +#pragma mark **** cache flush **** + +__private_extern__ +void +chudxnu_flush_caches(void) +{ +/* XXX */ +} + +__private_extern__ +void +chudxnu_enable_caches(boolean_t enable) +{ +#pragma unused (enable) +/* XXX */ +} + +#pragma mark **** perfmon facility **** + +__private_extern__ kern_return_t +chudxnu_perfmon_acquire_facility(task_t task) +{ + return pmc_acquire(task); +} + +__private_extern__ kern_return_t +chudxnu_perfmon_release_facility(task_t task) +{ + return pmc_release(task); +} + +#pragma mark **** rupt counters **** + +__private_extern__ kern_return_t +chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts) +{ + if(cpu < 0 || (unsigned int)cpu >= real_ncpus) { // sanity check + return KERN_FAILURE; + } + + if(rupts) { + boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + cpu_data_t *per_proc; + + per_proc = cpu_data_ptr[cpu]; + rupts->hwResets = 0; + rupts->hwMachineChecks = 0; + rupts->hwDSIs = 0; + rupts->hwISIs = 0; + rupts->hwExternals = 0; + rupts->hwAlignments = 0; + rupts->hwPrograms = 0; + rupts->hwFloatPointUnavailable = 0; + rupts->hwDecrementers = 0; + rupts->hwIOErrors = 0; + rupts->hwSystemCalls = 0; + rupts->hwTraces = 0; + rupts->hwFloatingPointAssists = 0; + rupts->hwPerformanceMonitors = 0; + rupts->hwAltivecs = 0; + rupts->hwInstBreakpoints = 0; + rupts->hwSystemManagements = 0; + rupts->hwAltivecAssists = 0; + rupts->hwThermal = 0; + rupts->hwSoftPatches = 0; + rupts->hwMaintenances = 0; + rupts->hwInstrumentations = 0; + + ml_set_interrupts_enabled(oldlevel); + return KERN_SUCCESS; + } else { + return KERN_FAILURE; + } +} + +__private_extern__ kern_return_t +chudxnu_clear_cpu_rupt_counters(int cpu) +{ + if(cpu < 0 || (unsigned int)cpu >= real_ncpus) { // sanity check + return KERN_FAILURE; + } + +/* + * XXX + * bzero((char *)&(cpu_data_ptr[cpu]->hwCtrs), sizeof(struct hwCtrs)); + */ + return KERN_SUCCESS; +} diff --git a/osfmk/chud/i386/chud_osfmk_callback_i386.c b/osfmk/chud/i386/chud_osfmk_callback_i386.c new file mode 100644 index 000000000..9800764df --- /dev/null +++ b/osfmk/chud/i386/chud_osfmk_callback_i386.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#define CHUD_TIMER_CALLBACK_CANCEL 0 +#define CHUD_TIMER_CALLBACK_ENTER 1 +#define CHUD_TIMER_CALLBACK 2 +#define CHUD_AST_SEND 3 +#define CHUD_AST_CALLBACK 4 +#define CHUD_CPUSIG_SEND 5 +#define CHUD_CPUSIG_CALLBACK 6 + +__private_extern__ +void chudxnu_cancel_all_callbacks(void) +{ + chudxnu_cpusig_callback_cancel(); + chudxnu_cpu_timer_callback_cancel_all(); + chudxnu_interrupt_callback_cancel(); + chudxnu_perfmon_ast_callback_cancel(); + chudxnu_kdebug_callback_cancel(); + chudxnu_thread_timer_callback_cancel(); + chudxnu_trap_callback_cancel(); +#if XXX + chudxnu_syscall_callback_cancel(); +#endif +} + +static chudcpu_data_t chudcpu_boot_cpu; +void * +chudxnu_cpu_alloc(boolean_t boot_processor) +{ + chudcpu_data_t *chud_proc_info; + + + if (boot_processor) { + chud_proc_info = &chudcpu_boot_cpu; + } else { + chud_proc_info = (chudcpu_data_t *) + kalloc(sizeof(chudcpu_data_t)); + if (chud_proc_info == (chudcpu_data_t *)NULL) { + return (void *)NULL; + } + } + bzero((char *)chud_proc_info, sizeof(chudcpu_data_t)); + chud_proc_info->t_deadline = 0xFFFFFFFFFFFFFFFFULL; + mpqueue_init(&chud_proc_info->cpu_request_queue); + + + return (void *)chud_proc_info; +} + +void +chudxnu_cpu_free(void *cp) +{ + if (cp == NULL || cp == (void *)&chudcpu_boot_cpu) { + return; + } else { + kfree(cp,sizeof(chudcpu_data_t)); + } +} + +static void +chudxnu_private_cpu_timer_callback( + timer_call_param_t param0, + timer_call_param_t param1) +{ +#pragma unused (param0) +#pragma unused (param1) + chudcpu_data_t *chud_proc_info; + boolean_t oldlevel; + x86_thread_state_t state; + mach_msg_type_number_t count; + chudxnu_cpu_timer_callback_func_t fn; + + oldlevel = ml_set_interrupts_enabled(FALSE); + chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); + + count = x86_THREAD_STATE_COUNT; + if (chudxnu_thread_get_state(current_thread(), + x86_THREAD_STATE, + (thread_state_t)&state, + &count, + FALSE) == KERN_SUCCESS) { + fn = chud_proc_info->cpu_timer_callback_fn; + if (fn) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_TIMER_CALLBACK) | DBG_FUNC_NONE, + (uint32_t)fn, 0,0,0,0); + //state.eip, state.cs, 0, 0); + (fn)( + x86_THREAD_STATE, + (thread_state_t)&state, + count); + } + } + + ml_set_interrupts_enabled(oldlevel); +} + +__private_extern__ kern_return_t +chudxnu_cpu_timer_callback_enter( + chudxnu_cpu_timer_callback_func_t func, + uint32_t time, + uint32_t units) +{ + chudcpu_data_t *chud_proc_info; + boolean_t oldlevel; + + oldlevel = ml_set_interrupts_enabled(FALSE); + chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); + + // cancel any existing callback for this cpu + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); + + chud_proc_info->cpu_timer_callback_fn = func; + + clock_interval_to_deadline(time, units, &(chud_proc_info->t_deadline)); + timer_call_setup(&(chud_proc_info->cpu_timer_call), + chudxnu_private_cpu_timer_callback, NULL); + timer_call_enter(&(chud_proc_info->cpu_timer_call), + chud_proc_info->t_deadline); + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_TIMER_CALLBACK_ENTER) | DBG_FUNC_NONE, + (uint32_t) func, time, units, 0, 0); + + ml_set_interrupts_enabled(oldlevel); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_cpu_timer_callback_cancel(void) +{ + chudcpu_data_t *chud_proc_info; + boolean_t oldlevel; + + oldlevel = ml_set_interrupts_enabled(FALSE); + chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); + + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_TIMER_CALLBACK_CANCEL) | DBG_FUNC_NONE, + 0, 0, 0, 0, 0); + + // set to max value: + chud_proc_info->t_deadline |= ~(chud_proc_info->t_deadline); + chud_proc_info->cpu_timer_callback_fn = NULL; + + ml_set_interrupts_enabled(oldlevel); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_cpu_timer_callback_cancel_all(void) +{ + unsigned int cpu; + chudcpu_data_t *chud_proc_info; + + for(cpu=0; cpu < real_ncpus; cpu++) { + chud_proc_info = (chudcpu_data_t *) cpu_data_ptr[cpu]->cpu_chud; + if (chud_proc_info == NULL) + continue; + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); + chud_proc_info->t_deadline |= ~(chud_proc_info->t_deadline); + chud_proc_info->cpu_timer_callback_fn = NULL; + } + return KERN_SUCCESS; +} + +#pragma mark **** trap **** +static chudxnu_trap_callback_func_t trap_callback_fn = NULL; + +static kern_return_t +chudxnu_private_trap_callback( + int trapno, + void *regs, + int unused1, + int unused2) +{ +#pragma unused (regs) +#pragma unused (unused1) +#pragma unused (unused2) + kern_return_t retval = KERN_FAILURE; + chudxnu_trap_callback_func_t fn = trap_callback_fn; + + if(fn) { + boolean_t oldlevel; + x86_thread_state_t state; // once we have an 64bit- independent way to determine if a thread is + // running kernel code, we'll switch to x86_thread_state_t. + mach_msg_type_number_t count; + + oldlevel = ml_set_interrupts_enabled(FALSE); + + count = x86_THREAD_STATE_COUNT; + if(chudxnu_thread_get_state(current_thread(), + x86_THREAD_STATE, + (thread_state_t)&state, + &count, + FALSE) == KERN_SUCCESS) { + + retval = (fn)( + trapno, + x86_THREAD_STATE, + (thread_state_t)&state, + count); + } + ml_set_interrupts_enabled(oldlevel); + } + + return retval; +} + +__private_extern__ kern_return_t +chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func) +{ + trap_callback_fn = func; + perfTrapHook = chudxnu_private_trap_callback; + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_trap_callback_cancel(void) +{ + trap_callback_fn = NULL; + perfTrapHook = NULL; + return KERN_SUCCESS; +} + +#pragma mark **** ast **** +static +chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; + +static kern_return_t +chudxnu_private_chud_ast_callback( + int trapno, + void *regs, + int unused1, + int unused2) +{ +#pragma unused (trapno) +#pragma unused (regs) +#pragma unused (unused1) +#pragma unused (unused2) + boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + ast_t *myast = ast_pending(); + kern_return_t retval = KERN_FAILURE; + chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn; + + if (*myast & AST_CHUD_URGENT) { + *myast &= ~(AST_CHUD_URGENT | AST_CHUD); + if ((*myast & AST_PREEMPTION) != AST_PREEMPTION) + *myast &= ~(AST_URGENT); + retval = KERN_SUCCESS; + } else if (*myast & AST_CHUD) { + *myast &= ~(AST_CHUD); + retval = KERN_SUCCESS; + } + + if (fn) { + x86_thread_state_t state; + mach_msg_type_number_t count; + count = x86_THREAD_STATE_COUNT; + + if (chudxnu_thread_get_state( + current_thread(), + x86_THREAD_STATE, + (thread_state_t) &state, &count, + TRUE) == KERN_SUCCESS) { + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_AST_CALLBACK) | DBG_FUNC_NONE, + (uint32_t) fn, 0, 0, 0, 0); + + (fn)( + x86_THREAD_STATE, + (thread_state_t) &state, + count); + } + } + + ml_set_interrupts_enabled(oldlevel); + return retval; +} + +__private_extern__ kern_return_t +chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func) +{ + perfmon_ast_callback_fn = func; + perfASTHook = chudxnu_private_chud_ast_callback; + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_perfmon_ast_callback_cancel(void) +{ + perfmon_ast_callback_fn = NULL; + perfASTHook = NULL; + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_perfmon_ast_send_urgent(boolean_t urgent) +{ + boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + ast_t *myast = ast_pending(); + + if(urgent) { + *myast |= (AST_CHUD_URGENT | AST_URGENT); + } else { + *myast |= (AST_CHUD); + } + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, CHUD_AST_SEND) | DBG_FUNC_NONE, + urgent, 0, 0, 0, 0); + + ml_set_interrupts_enabled(oldlevel); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_perfmon_ast_send(void) +{ + return chudxnu_perfmon_ast_send_urgent(TRUE); +} + +#pragma mark **** interrupt **** +static chudxnu_interrupt_callback_func_t interrupt_callback_fn = NULL; + +static void +chudxnu_private_interrupt_callback(void *foo) +{ +#pragma unused (foo) + chudxnu_interrupt_callback_func_t fn = interrupt_callback_fn; + + if(fn) { + boolean_t oldlevel; + x86_thread_state_t state; + mach_msg_type_number_t count; + + oldlevel = ml_set_interrupts_enabled(FALSE); + + count = x86_THREAD_STATE_COUNT; + if(chudxnu_thread_get_state(current_thread(), + x86_THREAD_STATE, + (thread_state_t)&state, + &count, + FALSE) == KERN_SUCCESS) { + (fn)( + X86_INTERRUPT_PERFMON, + x86_THREAD_STATE, + (thread_state_t)&state, + count); + } + ml_set_interrupts_enabled(oldlevel); + } +} + +__private_extern__ kern_return_t +chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func) +{ + interrupt_callback_fn = func; + lapic_set_pmi_func((i386_intr_func_t)chudxnu_private_interrupt_callback); + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_interrupt_callback_cancel(void) +{ + interrupt_callback_fn = NULL; + lapic_set_pmi_func(NULL); + return KERN_SUCCESS; +} + +#pragma mark **** cpu signal **** +static chudxnu_cpusig_callback_func_t cpusig_callback_fn = NULL; + +static kern_return_t +chudxnu_private_cpu_signal_handler(int request) +{ + chudxnu_cpusig_callback_func_t fn = cpusig_callback_fn; + + if (fn) { + x86_thread_state_t state; + mach_msg_type_number_t count = x86_THREAD_STATE_COUNT; + + if (chudxnu_thread_get_state(current_thread(), + x86_THREAD_STATE, + (thread_state_t) &state, &count, + FALSE) == KERN_SUCCESS) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_CPUSIG_CALLBACK) | DBG_FUNC_NONE, + (uint32_t)fn, request, 0, 0, 0); + return (fn)( + request, x86_THREAD_STATE, + (thread_state_t) &state, count); + } else { + return KERN_FAILURE; + } + } + return KERN_SUCCESS; //ignored +} +/* + * chudxnu_cpu_signal_handler() is called from the IPI handler + * when a CHUD signal arrives from another processor. + */ +__private_extern__ void +chudxnu_cpu_signal_handler(void) +{ + chudcpu_signal_request_t *reqp; + chudcpu_data_t *chudinfop; + + chudinfop = (chudcpu_data_t *) current_cpu_datap()->cpu_chud; + + mpdequeue_head(&(chudinfop->cpu_request_queue), + (queue_entry_t *) &reqp); + while (reqp != NULL) { + chudxnu_private_cpu_signal_handler(reqp->req_code); + reqp->req_sync = 0; + mpdequeue_head(&(chudinfop->cpu_request_queue), + (queue_entry_t *) &reqp); + } +} + +__private_extern__ kern_return_t +chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func) +{ + cpusig_callback_fn = func; + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_cpusig_callback_cancel(void) +{ + cpusig_callback_fn = NULL; + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_cpusig_send(int otherCPU, uint32_t request_code) +{ + int thisCPU; + kern_return_t retval = KERN_FAILURE; + chudcpu_signal_request_t request; + uint64_t deadline; + chudcpu_data_t *target_chudp; + boolean_t old_level; + + disable_preemption(); + // force interrupts on for a cross CPU signal. + old_level = chudxnu_set_interrupts_enabled(TRUE); + thisCPU = cpu_number(); + + if ((unsigned) otherCPU < real_ncpus && + thisCPU != otherCPU && + cpu_data_ptr[otherCPU]->cpu_running) { + + target_chudp = (chudcpu_data_t *) + cpu_data_ptr[otherCPU]->cpu_chud; + + /* Fill out request */ + request.req_sync = 0xFFFFFFFF; /* set sync flag */ + //request.req_type = CPRQchud; /* set request type */ + request.req_code = request_code; /* set request */ + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_CHUD, + CHUD_CPUSIG_SEND) | DBG_FUNC_NONE, + otherCPU, request_code, 0, 0, 0); + + /* + * Insert the new request in the target cpu's request queue + * and signal target cpu. + */ + mpenqueue_tail(&target_chudp->cpu_request_queue, + &request.req_entry); + i386_signal_cpu(otherCPU, MP_CHUD, ASYNC); + + /* Wait for response or timeout */ + deadline = mach_absolute_time() + LockTimeOut; + while (request.req_sync != 0) { + if (mach_absolute_time() > deadline) { + panic("chudxnu_cpusig_send(%d,%d) timed out\n", + otherCPU, request_code); + } + cpu_pause(); + } + retval = KERN_SUCCESS; + } else { + retval = KERN_INVALID_ARGUMENT; + } + + chudxnu_set_interrupts_enabled(old_level); + enable_preemption(); + return retval; +} + +#ifdef XXX +#pragma mark **** CHUD syscall (PPC) **** + +typedef int (*PPCcallEnt)(struct savearea *save); +extern PPCcallEnt PPCcalls[]; + +static chudxnu_syscall_callback_func_t syscall_callback_fn = NULL; + +static int +chudxnu_private_syscall_callback(struct savearea *ssp) +{ + if(ssp) { + if(syscall_callback_fn) { + struct ppc_thread_state64 state; + kern_return_t retval; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); + ssp->save_r3 = (syscall_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + } else { + ssp->save_r3 = KERN_FAILURE; + } + } + + return 1; // check for ASTs (always) +} + +__private_extern__ kern_return_t +chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func) +{ + syscall_callback_fn = func; + PPCcalls[9] = chudxnu_private_syscall_callback; + __asm__ volatile("eieio"); /* force order */ + __asm__ volatile("sync"); /* force to memory */ + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_syscall_callback_cancel(void) +{ + syscall_callback_fn = NULL; + PPCcalls[9] = NULL; + __asm__ volatile("eieio"); /* force order */ + __asm__ volatile("sync"); /* force to memory */ + return KERN_SUCCESS; +} +#endif diff --git a/osfmk/chud/i386/chud_thread_i386.c b/osfmk/chud/i386/chud_thread_i386.c new file mode 100644 index 000000000..ef90c2379 --- /dev/null +++ b/osfmk/chud/i386/chud_thread_i386.c @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#pragma mark **** thread state **** + +__private_extern__ kern_return_t +chudxnu_thread_user_state_available(thread_t thread) +{ +#pragma unused (thread) + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_thread_get_state( + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + if (user_only) { + /* We can't get user state for kernel threads */ + if (thread->task == kernel_task) + return KERN_FAILURE; + /* this properly handles deciding whether or not the thread is 64 bit or not */ + return machine_thread_get_state(thread, flavor, tstate, count); + } else { + // i386 machine_thread_get_kern_state() is different from the PPC version which returns + // the previous save area - user or kernel - rather than kernel or NULL if no kernel + // interrupt state available + + // the real purpose of this branch is the following: + // the user doesn't care if the thread states are user or kernel, he + // just wants the thread state, so we need to determine the proper one + // to return, kernel or user, for the given thread. + if(thread == current_thread() && current_cpu_datap()->cpu_int_state) { + // the above are conditions where we possibly can read the kernel + // state. we still need to determine if this interrupt happened in + // kernel or user context + if(USER_STATE(thread) == current_cpu_datap()->cpu_int_state && + current_cpu_datap()->cpu_interrupt_level == 1) { + // interrupt happened in user land + return machine_thread_get_state(thread, flavor, tstate, count); + } else { + // kernel interrupt. + return machine_thread_get_kern_state(thread, flavor, tstate, count); + } + } else { + // get the user-mode thread state + return machine_thread_get_state(thread, flavor, tstate, count); + } + } +} + +__private_extern__ kern_return_t +chudxnu_thread_set_state( + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t count, + boolean_t user_only) +{ +#pragma unused (user_only) + return machine_thread_set_state(thread, flavor, tstate, count); +} + +#pragma mark **** task memory read/write **** + +__private_extern__ kern_return_t +chudxnu_task_read( + task_t task, + void *kernaddr, + uint64_t usraddr, + vm_size_t size) +{ + kern_return_t ret = KERN_SUCCESS; + + if(current_task()==task) { + if(ml_at_interrupt_context()) { + return KERN_FAILURE; // can't do copyin on interrupt stack + } + + if(copyin(usraddr, kernaddr, size)) { + ret = KERN_FAILURE; + } + } else { + vm_map_t map = get_task_map(task); + ret = vm_map_read_user(map, usraddr, kernaddr, size); + } + + return ret; +} + +__private_extern__ kern_return_t +chudxnu_task_write( + task_t task, + uint64_t useraddr, + void *kernaddr, + vm_size_t size) +{ + kern_return_t ret = KERN_SUCCESS; + + if(current_task()==task) { + if(ml_at_interrupt_context()) { + return KERN_FAILURE; // can't do copyout on interrupt stack + } + + if(copyout(kernaddr, useraddr, size)) { + ret = KERN_FAILURE; + } + } else { + vm_map_t map = get_task_map(task); + ret = vm_map_write_user(map, kernaddr, useraddr, size); + } + + return ret; +} + +__private_extern__ kern_return_t +chudxnu_kern_read(void *dstaddr, vm_offset_t srcaddr, vm_size_t size) +{ + while(size>0) { + ppnum_t pp; + addr64_t phys_addr; + + /* Get the page number */ + pp = pmap_find_phys(kernel_pmap, srcaddr); + if(!pp) { + return KERN_FAILURE; /* Not mapped... */ + } + + /* Shove in the page offset */ + phys_addr = ((addr64_t)pp << 12) | + (srcaddr & 0x0000000000000FFFULL); + if(phys_addr >= mem_actual) { + return KERN_FAILURE; /* out of range */ + } + + if((phys_addr&0x1) || size==1) { + *((uint8_t *)dstaddr) = + ml_phys_read_byte_64(phys_addr); + dstaddr = ((uint8_t *)dstaddr) + 1; + srcaddr += sizeof(uint8_t); + size -= sizeof(uint8_t); + } else if((phys_addr&0x3) || size<=2) { + *((uint16_t *)dstaddr) = + ml_phys_read_half_64(phys_addr); + dstaddr = ((uint16_t *)dstaddr) + 1; + srcaddr += sizeof(uint16_t); + size -= sizeof(uint16_t); + } else { + *((uint32_t *)dstaddr) = + ml_phys_read_word_64(phys_addr); + dstaddr = ((uint32_t *)dstaddr) + 1; + srcaddr += sizeof(uint32_t); + size -= sizeof(uint32_t); + } + } + return KERN_SUCCESS; +} + +__private_extern__ kern_return_t +chudxnu_kern_write( + vm_offset_t dstaddr, + void *srcaddr, + vm_size_t size) +{ + while(size>0) { + ppnum_t pp; + addr64_t phys_addr; + + /* Get the page number */ + pp = pmap_find_phys(kernel_pmap, dstaddr); + if(!pp) { + return KERN_FAILURE; /* Not mapped... */ + } + + /* Shove in the page offset */ + phys_addr = ((addr64_t)pp << 12) | + (dstaddr & 0x0000000000000FFFULL); + if(phys_addr > mem_actual) { + return KERN_FAILURE; /* out of range */ + } + + if((phys_addr&0x1) || size==1) { + ml_phys_write_byte_64(phys_addr, *((uint8_t *)srcaddr)); + srcaddr = ((uint8_t *)srcaddr) + 1; + dstaddr += sizeof(uint8_t); + size -= sizeof(uint8_t); + } else if((phys_addr&0x3) || size<=2) { + ml_phys_write_half_64(phys_addr, *((uint16_t *)srcaddr)); + srcaddr = ((uint16_t *)srcaddr) + 1; + dstaddr += sizeof(uint16_t); + size -= sizeof(uint16_t); + } else { + ml_phys_write_word_64(phys_addr, *((uint32_t *)srcaddr)); + srcaddr = ((uint32_t *)srcaddr) + 1; + dstaddr += sizeof(uint32_t); + size -= sizeof(uint32_t); + } + } + + return KERN_SUCCESS; +} + +#define VALID_STACK_ADDRESS(supervisor, addr, minKernAddr, maxKernAddr) (supervisor ? (addr>=minKernAddr && addr<=maxKernAddr) : TRUE) +// don't try to read in the hole +#define VALID_STACK_ADDRESS64(supervisor, addr, minKernAddr, maxKernAddr) \ + (supervisor ? (addr >= minKernAddr && addr <= maxKernAddr) : \ + (addr != 0 && (addr <= 0x00007FFFFFFFFFFFULL || addr >= 0xFFFF800000000000ULL))) + +typedef struct _cframe64_t { + uint64_t prevFP; // can't use a real pointer here until we're a 64 bit kernel + uint64_t caller; + uint64_t args[0]; +}cframe64_t; + + +typedef struct _cframe_t { + struct _cframe_t *prev; // when we go 64 bits, this needs to be capped at 32 bits + uint32_t caller; + uint32_t args[0]; +} cframe_t; + +__private_extern__ +kern_return_t chudxnu_thread_get_callstack64( + thread_t thread, + uint64_t *callstack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + kern_return_t kr = KERN_FAILURE; + kern_return_t ret = KERN_SUCCESS; + task_t task = thread->task; + uint64_t currPC = 0; + uint64_t prevPC = 0; + uint64_t currFP = 0; + uint64_t prevFP = 0; + uint64_t rsp = 0; + uint64_t kernStackMin = min_valid_stack_address(); + uint64_t kernStackMax = max_valid_stack_address(); + uint64_t *buffer = callstack; + int bufferIndex = 0; + int bufferMaxIndex = *count; + boolean_t supervisor = FALSE; + boolean_t is64bit = FALSE; + void * t_regs; + + if (user_only) { + /* We can't get user state for kernel threads */ + if (task == kernel_task) { + return KERN_FAILURE; + } + t_regs = USER_STATE(thread); + + if(is_saved_state64(t_regs)) { + void *int_state = current_cpu_datap()->cpu_int_state; + x86_saved_state64_t *s64 = saved_state64(t_regs); + + if(int_state) { // are we on an interrupt that happened in user land + supervisor = !(t_regs == int_state && current_cpu_datap()->cpu_interrupt_level == 1); + } else { + if(s64) { + supervisor = ((s64->isf.cs & SEL_PL) != SEL_PL_U); + } else { + // assume 32 bit kernel + supervisor = FALSE; + } + } + is64bit = TRUE; + } else { + x86_saved_state32_t *regs; + + regs = saved_state32(t_regs); + + // find out if we're in supervisor mode + supervisor = ((regs->cs & SEL_PL) != SEL_PL_U); + is64bit = FALSE; + } + } else { + t_regs = current_cpu_datap()->cpu_int_state; + x86_saved_state32_t *regs; + + regs = saved_state32(t_regs); + + // find out if we're in supervisor mode + supervisor = ((regs->cs & SEL_PL) != SEL_PL_U); + is64bit = FALSE; + } + + if(is64bit) { + x86_saved_state64_t *regs = saved_state64(t_regs); + + if(user_only) { + /* cant get user state for kernel threads */ + if(task == kernel_task) { + return KERN_FAILURE; + } + regs = USER_REGS64(thread); + } + + currPC = regs->isf.rip; + currFP = regs->rbp; + + if(!currPC) + { + *count = 0; + return KERN_FAILURE; + } + + bufferIndex = 0; + + //allot space for saving %rsp on the + //bottom of the stack for user callstacks + if(!supervisor) + bufferMaxIndex = bufferMaxIndex - 1; + + if(bufferMaxIndex < 1) { + *count = 0; + return KERN_RESOURCE_SHORTAGE; + } + buffer[bufferIndex++] = currPC; // save RIP on the top of the stack + + // now make a 64bit back trace + while (VALID_STACK_ADDRESS64(supervisor, currFP, kernStackMin, kernStackMax)) + { + // this is the address where caller lives in the user thread + uint64_t caller = currFP + sizeof(uint64_t); + if(!currFP) { + currPC = 0; + break; + } + + if(bufferIndex >= bufferMaxIndex) { + *count = bufferMaxIndex; + return KERN_RESOURCE_SHORTAGE; + } + + /* read our caller */ + kr = chudxnu_task_read(task, &currPC, caller, sizeof(uint64_t)); + + if(kr != KERN_SUCCESS) { + currPC = 0; + break; + } + + /* + * retrive contents of the frame pointer and advance to the next stack + * frame if it's valid + */ + prevFP = 0; + kr = chudxnu_task_read(task, &prevFP, currFP, sizeof(uint64_t)); + + if(kr != KERN_SUCCESS) { + currPC = 0; + break; + } + + if(VALID_STACK_ADDRESS64(supervisor, prevFP, kernStackMin, kernStackMax)) { + buffer[bufferIndex++] = currPC; + prevPC = currPC; + } + if(prevFP < currFP) { + break; + } else { + currFP = prevFP; + } + } + + // append (rsp) on the bottom of the callstack + kr = chudxnu_task_read(task, &rsp, (addr64_t) regs->isf.rsp, sizeof(uint64_t)); + if(kr == KERN_SUCCESS) { + buffer[bufferIndex++] = rsp; + } + } else { + /* !thread_is_64bit() */ + /* we grab 32 bit frames and silently promote them to 64 bits */ + uint32_t tmpWord = 0; + x86_saved_state32_t *regs = NULL; + + if(user_only) { + /* cant get user state for kernel threads */ + if(task == kernel_task || supervisor) { + return 0x11; + } + regs = USER_REGS32(thread); + } else { + regs = saved_state32(current_cpu_datap()->cpu_int_state); + } + + if(regs == NULL) { + *count = 0; + return 0x12; + } + + currPC = (uint64_t) regs->eip; + currFP = (uint64_t) regs->ebp; + + bufferIndex = 0; + //if(!supervisor) + // bufferMaxIndex = bufferMaxIndex - 1; //allot space for saving %rsp on the stack for user callstacks + if(bufferMaxIndex < 1) { + *count = 0; + return KERN_RESOURCE_SHORTAGE; + } + buffer[bufferIndex++] = currPC; // save EIP on the top of the stack + + // now make a 64bit back trace from 32 bit stack frames + while (VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) + { + cframe_t *fp = (cframe_t *) (uint32_t) currFP; + + if(bufferIndex >= bufferMaxIndex) { + *count = bufferMaxIndex; + return KERN_RESOURCE_SHORTAGE; + } + + /* read the next frame */ + if(supervisor) { + kr = chudxnu_kern_read(&tmpWord, (vm_offset_t) &fp->caller, sizeof(uint32_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, (vm_offset_t) &fp->caller, sizeof(uint32_t)); + } + + if(kr != KERN_SUCCESS) { + currPC = 0; + break; + } + + currPC = (uint64_t) tmpWord; // promote 32 bit address + + /* + * retrive contents of the frame pointer and advance to the next stack + * frame if it's valid + */ + prevFP = 0; + if(supervisor) { + kr = chudxnu_kern_read(&tmpWord, (vm_offset_t)&fp->prev, sizeof(uint32_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, (vm_offset_t)&fp->prev, sizeof(uint32_t)); + } + prevFP = (uint64_t) tmpWord; // promote 32 bit address + + if(prevFP) { + buffer[bufferIndex++] = currPC; + prevPC = currPC; + } + if(prevFP < currFP) { + break; + } else { + currFP = prevFP; + } + } + + // append (esp) on the bottom of the callstack + if(!supervisor) { + kr = chudxnu_task_read(task, &tmpWord, regs->uesp, sizeof(uint32_t)); + if(kr == KERN_SUCCESS) { + rsp = (uint64_t) tmpWord; // promote 32 bit address + buffer[bufferIndex++] = rsp; + } + } + } + + *count = bufferIndex; + return ret; +} + +__private_extern__ kern_return_t +chudxnu_thread_get_callstack( + thread_t thread, + uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + kern_return_t kr; + task_t task = thread->task; + uint32_t currPC; + uint32_t currFP; + uint32_t prevFP = 0; + uint32_t prevPC = 0; + uint32_t esp = 0; + uint32_t kernStackMin = min_valid_stack_address(); + uint32_t kernStackMax = max_valid_stack_address(); + uint32_t *buffer = callStack; + int bufferIndex = 0; + int bufferMaxIndex = *count; + boolean_t supervisor; + x86_saved_state32_t *regs = NULL; + + if (user_only) { + /* We can't get user state for kernel threads */ + if (task == kernel_task) { + return KERN_FAILURE; + } + regs = USER_REGS32(thread); + } else { + regs = saved_state32(current_cpu_datap()->cpu_int_state); + } + + if (regs == NULL) { + *count = 0; + return KERN_FAILURE; + } + + supervisor = ((regs->cs & SEL_PL) != SEL_PL_U); + + currPC = regs->eip; + currFP = regs->ebp; + + bufferIndex = 0; + if(!supervisor) + bufferMaxIndex -= 1; // allot space for saving userland %esp on stack + if (bufferMaxIndex < 1) { + *count = 0; + return KERN_RESOURCE_SHORTAGE; + } + buffer[bufferIndex++] = currPC; //save PC in position 0. + + // Now, fill buffer with stack backtraces. + while (VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) { + cframe_t *fp = (cframe_t *) currFP; + + if (bufferIndex >= bufferMaxIndex) { + *count = bufferMaxIndex; + return KERN_RESOURCE_SHORTAGE; + } + + if (supervisor) { + kr = chudxnu_kern_read( + &currPC, + (vm_offset_t) &fp->caller, + sizeof(currPC)); + } else { + kr = chudxnu_task_read( + task, + &currPC, + (vm_offset_t) &fp->caller, + sizeof(currPC)); + } + if (kr != KERN_SUCCESS) + break; + + //retrieve the contents of the frame pointer + // and advance to the prev stack frame if it's valid + prevFP = 0; + if (supervisor) { + kr = chudxnu_kern_read( + &prevFP, + (vm_offset_t) &fp->prev, + sizeof(prevFP)); + } else { + kr = chudxnu_task_read( + task, + &prevFP, + (vm_offset_t) &fp->prev, + sizeof(prevFP)); + } + if (prevFP) { + buffer[bufferIndex++] = currPC; + prevPC = currPC; + } + if (prevFP < currFP) { + break; + } else { + currFP = prevFP; + } + } + + // put the stack pointer on the bottom of the backtrace + if(!supervisor) { + kr = chudxnu_task_read(task, &esp, regs->uesp, sizeof(uint32_t)); + if(kr == KERN_SUCCESS) { + buffer[bufferIndex++] = esp; + } + } + + *count = bufferIndex; + return KERN_SUCCESS; +} + + +#pragma mark **** DEPRECATED **** + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_bind_current_thread(int cpu) +{ + return chudxnu_bind_thread(current_thread(), cpu); +} + +// DEPRECATED +kern_return_t chudxnu_unbind_current_thread(void) +{ + return chudxnu_unbind_thread(current_thread()); +} + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_current_thread_get_callstack( + uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + return chudxnu_thread_get_callstack( + current_thread(), callStack, count, user_only); +} + +// DEPRECATED +__private_extern__ +thread_t chudxnu_current_act(void) +{ + return chudxnu_current_thread(); +} diff --git a/osfmk/ppc/chud/chud_xnu_glue.h b/osfmk/chud/i386/chud_xnu_glue.h similarity index 100% rename from osfmk/ppc/chud/chud_xnu_glue.h rename to osfmk/chud/i386/chud_xnu_glue.h diff --git a/osfmk/chud/i386/chud_xnu_private.h b/osfmk/chud/i386/chud_xnu_private.h new file mode 100644 index 000000000..5fe82ee2a --- /dev/null +++ b/osfmk/chud/i386/chud_xnu_private.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _I386_CHUD_XNU_PRIVATE_H_ +#define _I386_CHUD_XNU_PRIVATE_H_ + +#include + +#pragma mark **** cpu timer **** + +/* + * Cross-cpu signal request entries are queued on the target cpu's + * chudcpu_data_t struct. This differs from PPC because i386 doesn't + * support sending arguments with cross-cpu signals. Hence we have + * to do it ourselves. + */ +typedef struct { + struct queue_entry req_entry; /* Must be first */ + uint32_t req_type; + uint32_t req_code; + volatile uint32_t req_sync; +} chudcpu_signal_request_t; + +typedef struct { + void *cpu_chud_fn_tablep; + timer_call_data_t cpu_timer_call; + uint64_t t_deadline; + chudxnu_cpu_timer_callback_func_t cpu_timer_callback_fn; + mpqueue_head_t cpu_request_queue; +} chudcpu_data_t; +/* NB: cpu_chud_fn_tablep is expected to be the first member, at offset 0 */ + +extern void chudxnu_cpu_signal_handler(void); + +#endif /* _I386_CHUD_XNU_PRIVATE_H_ */ diff --git a/osfmk/ppc/chud/chud_cpu_asm.h b/osfmk/chud/ppc/chud_cpu_asm.h similarity index 100% rename from osfmk/ppc/chud/chud_cpu_asm.h rename to osfmk/chud/ppc/chud_cpu_asm.h diff --git a/osfmk/ppc/chud/chud_cpu_asm.s b/osfmk/chud/ppc/chud_cpu_asm.s similarity index 99% rename from osfmk/ppc/chud/chud_cpu_asm.s rename to osfmk/chud/ppc/chud_cpu_asm.s index 4bf583034..f9e6cc4f1 100644 --- a/osfmk/ppc/chud/chud_cpu_asm.s +++ b/osfmk/chud/ppc/chud_cpu_asm.s @@ -21,7 +21,7 @@ */ #define ASSEMBLER -#include +#include #include #include diff --git a/osfmk/ppc/chud/chud_cpu.c b/osfmk/chud/ppc/chud_cpu_ppc.c similarity index 96% rename from osfmk/ppc/chud/chud_cpu.c rename to osfmk/chud/ppc/chud_cpu_ppc.c index faed408a4..6bd2df97b 100644 --- a/osfmk/ppc/chud/chud_cpu.c +++ b/osfmk/chud/ppc/chud_cpu_ppc.c @@ -25,9 +25,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -50,44 +50,6 @@ #define mtsr(sr, reg) __asm__ volatile("sync" "@" "mtsr sr%0, %1 " "@" "isync" : : "i" (sr), "r" (reg)); #define mfsr(reg, sr) __asm__ volatile("mfsr %0, sr%1" : "=r" (reg) : "i" (sr)); -#pragma mark **** cpu count **** - -__private_extern__ -int chudxnu_avail_cpu_count(void) -{ - host_basic_info_data_t hinfo; - kern_return_t kr; - mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; - - kr = host_info(host_self(), HOST_BASIC_INFO, (integer_t *)&hinfo, &count); - if(kr == KERN_SUCCESS) { - return hinfo.avail_cpus; - } else { - return 0; - } -} - -__private_extern__ -int chudxnu_phys_cpu_count(void) -{ - host_basic_info_data_t hinfo; - kern_return_t kr; - mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; - - kr = host_info(host_self(), HOST_BASIC_INFO, (integer_t *)&hinfo, &count); - if(kr == KERN_SUCCESS) { - return hinfo.max_cpus; - } else { - return 0; - } -} - -__private_extern__ -int chudxnu_cpu_number(void) -{ - return cpu_number(); -} - #pragma mark **** cpu enable/disable **** extern kern_return_t processor_start(processor_t processor); // osfmk/kern/processor.c @@ -1141,45 +1103,6 @@ kern_return_t chudxnu_perfmon_release_facility(task_t task) return perfmon_release_facility(task); } -#pragma mark **** branch trace buffer **** - -extern int pc_trace_buf[1024]; - -__private_extern__ -uint32_t * chudxnu_get_branch_trace_buffer(uint32_t *entries) -{ - if(entries) { - *entries = sizeof(pc_trace_buf)/sizeof(int); - } - return pc_trace_buf; -} - -#pragma mark **** interrupts enable/disable **** - -__private_extern__ -boolean_t chudxnu_get_interrupts_enabled(void) -{ - return ml_get_interrupts_enabled(); -} - -__private_extern__ -boolean_t chudxnu_set_interrupts_enabled(boolean_t enable) -{ - return ml_set_interrupts_enabled(enable); -} - -__private_extern__ -boolean_t chudxnu_at_interrupt_context(void) -{ - return ml_at_interrupt_context(); -} - -__private_extern__ -void chudxnu_cause_interrupt(void) -{ - ml_cause_interrupt(); -} - #pragma mark **** rupt counters **** __private_extern__ diff --git a/osfmk/ppc/chud/chud_osfmk_callback.c b/osfmk/chud/ppc/chud_osfmk_callback_ppc.c similarity index 72% rename from osfmk/ppc/chud/chud_osfmk_callback.c rename to osfmk/chud/ppc/chud_osfmk_callback_ppc.c index e0ccc2012..54177dc3e 100644 --- a/osfmk/ppc/chud/chud_osfmk_callback.c +++ b/osfmk/chud/ppc/chud_osfmk_callback_ppc.c @@ -26,7 +26,6 @@ #include #include -#include #include #include #include @@ -38,8 +37,8 @@ #include #include -#include -#include +#include +#include __private_extern__ void chudxnu_cancel_all_callbacks(void) @@ -54,13 +53,6 @@ void chudxnu_cancel_all_callbacks(void) chudxnu_syscall_callback_cancel(); } -#pragma mark **** cpu timer **** -typedef struct { - timer_call_data_t cpu_timer_call; - uint64_t t_deadline; - chudxnu_cpu_timer_callback_func_t cpu_timer_callback_fn; -} chudcpu_data_t; - static chudcpu_data_t chudcpu_boot_cpu; void *chudxnu_per_proc_alloc(boolean_t boot_processor) @@ -95,14 +87,16 @@ static void chudxnu_private_cpu_timer_callback(timer_call_param_t param0, timer_ boolean_t oldlevel; struct ppc_thread_state64 state; mach_msg_type_number_t count; + chudxnu_cpu_timer_callback_func_t fn = NULL; oldlevel = ml_set_interrupts_enabled(FALSE); chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud); count = PPC_THREAD_STATE64_COUNT; if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) { - if(chud_proc_info->cpu_timer_callback_fn) { - (chud_proc_info->cpu_timer_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + fn = chud_proc_info->cpu_timer_callback_fn; + if(fn) { + (fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); } } @@ -203,13 +197,14 @@ static kern_return_t chudxnu_private_trap_callback(int trapno, struct savearea * boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); kern_return_t retval = KERN_FAILURE; uint32_t trapentry = TRAP_ENTRY_POINT(trapno); + chudxnu_trap_callback_func_t fn = trap_callback_fn; if(trapentry!=0x0) { - if(trap_callback_fn) { + if(fn) { struct ppc_thread_state64 state; mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - retval = (trap_callback_fn)(trapentry, PPC_THREAD_STATE64, (thread_state_t)&state, count); + retval = (fn)(trapentry, PPC_THREAD_STATE64, (thread_state_t)&state, count); } } @@ -246,40 +241,41 @@ static kern_return_t chudxnu_private_chud_ast_callback(int trapno, struct savear boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); ast_t *myast = ast_pending(); kern_return_t retval = KERN_FAILURE; + chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn; - if(*myast & AST_PPC_CHUD_URGENT) { - *myast &= ~(AST_PPC_CHUD_URGENT | AST_PPC_CHUD); + if(*myast & AST_CHUD_URGENT) { + *myast &= ~(AST_CHUD_URGENT | AST_CHUD); if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT); retval = KERN_SUCCESS; - } else if(*myast & AST_PPC_CHUD) { - *myast &= ~(AST_PPC_CHUD); + } else if(*myast & AST_CHUD) { + *myast &= ~(AST_CHUD); retval = KERN_SUCCESS; } - if(perfmon_ast_callback_fn) { + if(fn) { struct ppc_thread_state64 state; mach_msg_type_number_t count; count = PPC_THREAD_STATE64_COUNT; if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) { - (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + (fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); } } #if 0 // ASTs from ihandler go through thandler and are made to look like traps - // always handle AST_PPC_CHUD_URGENT if there's a callback - // only handle AST_PPC_CHUD if it's the only AST pending - if(perfmon_ast_callback_fn && ((*myast & AST_PPC_CHUD_URGENT) || ((*myast & AST_PPC_CHUD) && !(*myast & AST_URGENT)))) { + // always handle AST_CHUD_URGENT if there's a callback + // only handle AST_CHUD if it's the only AST pending + if(perfmon_ast_callback_fn && ((*myast & AST_CHUD_URGENT) || ((*myast & AST_CHUD) && !(*myast & AST_URGENT)))) { struct ppc_thread_state64 state; mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - if(*myast & AST_PPC_CHUD_URGENT) { - *myast &= ~(AST_PPC_CHUD_URGENT | AST_PPC_CHUD); + if(*myast & AST_CHUD_URGENT) { + *myast &= ~(AST_CHUD_URGENT | AST_CHUD); if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT); retval = KERN_SUCCESS; - } else if(*myast & AST_PPC_CHUD) { - *myast &= ~(AST_PPC_CHUD); + } else if(*myast & AST_CHUD) { + *myast &= ~(AST_CHUD); retval = KERN_SUCCESS; } (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); @@ -317,9 +313,9 @@ kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent) ast_t *myast = ast_pending(); if(urgent) { - *myast |= (AST_PPC_CHUD_URGENT | AST_URGENT); + *myast |= (AST_CHUD_URGENT | AST_URGENT); } else { - *myast |= (AST_PPC_CHUD); + *myast |= (AST_CHUD); } ml_set_interrupts_enabled(oldlevel); @@ -338,11 +334,13 @@ static chudxnu_interrupt_callback_func_t interrupt_callback_fn = NULL; static kern_return_t chudxnu_private_interrupt_callback(int trapno, struct savearea *ssp, unsigned int dsisr, unsigned int dar) { - if(interrupt_callback_fn) { + chudxnu_interrupt_callback_func_t fn = interrupt_callback_fn; + + if(fn) { struct ppc_thread_state64 state; mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - return (interrupt_callback_fn)(TRAP_ENTRY_POINT(trapno), PPC_THREAD_STATE64, (thread_state_t)&state, count); + return (fn)(TRAP_ENTRY_POINT(trapno), PPC_THREAD_STATE64, (thread_state_t)&state, count); } else { return KERN_FAILURE; } @@ -374,11 +372,13 @@ extern perfCallback perfCpuSigHook; /* function hook into cpu_signal_handler() * static kern_return_t chudxnu_private_cpu_signal_handler(int request, struct savearea *ssp, unsigned int arg0, unsigned int arg1) { - if(cpusig_callback_fn) { + chudxnu_cpusig_callback_func_t fn = cpusig_callback_fn; + + if(fn) { struct ppc_thread_state64 state; mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - (cpusig_callback_fn)(request, PPC_THREAD_STATE64, (thread_state_t)&state, count); + (fn)(request, PPC_THREAD_STATE64, (thread_state_t)&state, count); } return KERN_SUCCESS; // ignored } @@ -442,124 +442,3 @@ kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request) ml_set_interrupts_enabled(oldlevel); return retval; } - -#pragma mark **** timer **** -__private_extern__ -chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0) -{ - return (chud_timer_t)thread_call_allocate((thread_call_func_t)func, (thread_call_param_t)param0); -} - -__private_extern__ -kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units) -{ - uint64_t t_delay; - clock_interval_to_deadline(time, units, &t_delay); - thread_call_enter1_delayed((thread_call_t)timer, (thread_call_param_t)param1, t_delay); - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer) -{ - thread_call_cancel((thread_call_t)timer); - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t chudxnu_timer_free(chud_timer_t timer) -{ - thread_call_cancel((thread_call_t)timer); - thread_call_free((thread_call_t)timer); - return KERN_SUCCESS; -} - -#pragma mark **** CHUD syscall (PPC) **** - -typedef int (*PPCcallEnt)(struct savearea *save); -extern PPCcallEnt PPCcalls[]; - -static chudxnu_syscall_callback_func_t syscall_callback_fn = NULL; - -static int chudxnu_private_syscall_callback(struct savearea *ssp) -{ - if(ssp) { - if(syscall_callback_fn) { - struct ppc_thread_state64 state; - kern_return_t retval; - mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; - chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - ssp->save_r3 = (syscall_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); - } else { - ssp->save_r3 = KERN_FAILURE; - } - } - - return 1; // check for ASTs (always) -} - -__private_extern__ -kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func) -{ - syscall_callback_fn = func; - PPCcalls[9] = chudxnu_private_syscall_callback; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t chudxnu_syscall_callback_cancel(void) -{ - syscall_callback_fn = NULL; - PPCcalls[9] = NULL; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; -} - -#pragma mark **** thread timer - DEPRECATED **** - -static thread_call_t thread_timer_call = NULL; -static chudxnu_thread_timer_callback_func_t thread_timer_callback_fn = NULL; - -static void chudxnu_private_thread_timer_callback(thread_call_param_t param0, thread_call_param_t param1) -{ - if(thread_timer_call) { - thread_call_free(thread_timer_call); - thread_timer_call = NULL; - - if(thread_timer_callback_fn) { - (thread_timer_callback_fn)((uint32_t)param0); - } - } -} - -// DEPRECATED -__private_extern__ -kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t param, uint32_t time, uint32_t units) -{ - if(!thread_timer_call) { - uint64_t t_delay; - thread_timer_callback_fn = func; - thread_timer_call = thread_call_allocate((thread_call_func_t)chudxnu_private_thread_timer_callback, (thread_call_param_t)param); - clock_interval_to_deadline(time, units, &t_delay); - thread_call_enter_delayed(thread_timer_call, t_delay); - return KERN_SUCCESS; - } else { - return KERN_FAILURE; // thread timer call already pending - } -} - -// DEPRECATED -__private_extern__ -kern_return_t chudxnu_thread_timer_callback_cancel(void) -{ - if(thread_timer_call) { - thread_call_cancel(thread_timer_call); - thread_call_free(thread_timer_call); - thread_timer_call = NULL; - } - thread_timer_callback_fn = NULL; - return KERN_SUCCESS; -} diff --git a/osfmk/ppc/chud/chud_spr.h b/osfmk/chud/ppc/chud_spr.h similarity index 100% rename from osfmk/ppc/chud/chud_spr.h rename to osfmk/chud/ppc/chud_spr.h diff --git a/osfmk/ppc/chud/chud_thread.c b/osfmk/chud/ppc/chud_thread_ppc.c similarity index 75% rename from osfmk/ppc/chud/chud_thread.c rename to osfmk/chud/ppc/chud_thread_ppc.c index b0fe7a94d..170a98bd9 100644 --- a/osfmk/ppc/chud/chud_thread.c +++ b/osfmk/chud/ppc/chud_thread_ppc.c @@ -32,8 +32,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -47,29 +47,6 @@ extern kern_return_t machine_thread_get_kern_state( thread_t thre mach_msg_type_number_t *count); -#pragma mark **** thread binding **** - -__private_extern__ -kern_return_t chudxnu_bind_thread(thread_t thread, int cpu) -{ - if(cpu>=0 && cpu=0x1000ULL && (addr&STACK_ALIGNMENT_MASK)==0x0 && (supervisor ? (addr>=kernStackMin && addr<=kernStackMax) : TRUE)) +#define VALID_STACK_ADDRESS(addr) (addr>=0x1000ULL && \ + (addr&STACK_ALIGNMENT_MASK)==0x0 && \ + (supervisor ? \ + (addr>=kernStackMin && \ + addr<=kernStackMax) : \ + TRUE)) __private_extern__ @@ -799,362 +781,6 @@ kern_return_t chudxnu_thread_get_callstack( thread_t thread, return KERN_SUCCESS; } -#pragma mark **** task and thread info **** - -__private_extern__ -boolean_t chudxnu_is_64bit_task(task_t task) -{ - return (task_has_64BitAddr(task)); -} - -#define THING_TASK 0 -#define THING_THREAD 1 - -// an exact copy of processor_set_things() except no mig conversion at the end! -static kern_return_t chudxnu_private_processor_set_things( processor_set_t pset, - mach_port_t **thing_list, - mach_msg_type_number_t *count, - int type) -{ - unsigned int actual; /* this many things */ - unsigned int maxthings; - unsigned int i; - - vm_size_t size, size_needed; - void *addr; - - if (pset == PROCESSOR_SET_NULL) - return (KERN_INVALID_ARGUMENT); - - size = 0; addr = 0; - - for (;;) { - pset_lock(pset); - if (!pset->active) { - pset_unlock(pset); - - return (KERN_FAILURE); - } - - if (type == THING_TASK) - maxthings = pset->task_count; - else - maxthings = pset->thread_count; - - /* do we have the memory we need? */ - - size_needed = maxthings * sizeof (mach_port_t); - if (size_needed <= size) - break; - - /* unlock the pset and allocate more memory */ - pset_unlock(pset); - - if (size != 0) - kfree(addr, size); - - assert(size_needed > 0); - size = size_needed; - - addr = kalloc(size); - if (addr == 0) - return (KERN_RESOURCE_SHORTAGE); - } - - /* OK, have memory and the processor_set is locked & active */ - - actual = 0; - switch (type) { - - case THING_TASK: - { - task_t task, *tasks = (task_t *)addr; - - for (task = (task_t)queue_first(&pset->tasks); - !queue_end(&pset->tasks, (queue_entry_t)task); - task = (task_t)queue_next(&task->pset_tasks)) { - task_reference_internal(task); - tasks[actual++] = task; - } - - break; - } - - case THING_THREAD: - { - thread_t thread, *threads = (thread_t *)addr; - - for (i = 0, thread = (thread_t)queue_first(&pset->threads); - !queue_end(&pset->threads, (queue_entry_t)thread); - thread = (thread_t)queue_next(&thread->pset_threads)) { - thread_reference_internal(thread); - threads[actual++] = thread; - } - - break; - } - } - - pset_unlock(pset); - - if (actual < maxthings) - size_needed = actual * sizeof (mach_port_t); - - if (actual == 0) { - /* no things, so return null pointer and deallocate memory */ - *thing_list = 0; - *count = 0; - - if (size != 0) - kfree(addr, size); - } - else { - /* if we allocated too much, must copy */ - - if (size_needed < size) { - void *newaddr; - - newaddr = kalloc(size_needed); - if (newaddr == 0) { - switch (type) { - - case THING_TASK: - { - task_t *tasks = (task_t *)addr; - - for (i = 0; i < actual; i++) - task_deallocate(tasks[i]); - break; - } - - case THING_THREAD: - { - thread_t *threads = (thread_t *)addr; - - for (i = 0; i < actual; i++) - thread_deallocate(threads[i]); - break; - } - } - - kfree(addr, size); - return (KERN_RESOURCE_SHORTAGE); - } - - bcopy((void *) addr, (void *) newaddr, size_needed); - kfree(addr, size); - addr = newaddr; - } - - *thing_list = (mach_port_t *)addr; - *count = actual; - } - - return (KERN_SUCCESS); -} - -// an exact copy of task_threads() except no mig conversion at the end! -static kern_return_t chudxnu_private_task_threads(task_t task, - thread_act_array_t *threads_out, - mach_msg_type_number_t *count) -{ - mach_msg_type_number_t actual; - thread_t *threads; - thread_t thread; - vm_size_t size, size_needed; - void *addr; - unsigned int i, j; - - if (task == TASK_NULL) - return (KERN_INVALID_ARGUMENT); - - size = 0; addr = 0; - - for (;;) { - task_lock(task); - if (!task->active) { - task_unlock(task); - - if (size != 0) - kfree(addr, size); - - return (KERN_FAILURE); - } - - actual = task->thread_count; - - /* do we have the memory we need? */ - size_needed = actual * sizeof (mach_port_t); - if (size_needed <= size) - break; - - /* unlock the task and allocate more memory */ - task_unlock(task); - - if (size != 0) - kfree(addr, size); - - assert(size_needed > 0); - size = size_needed; - - addr = kalloc(size); - if (addr == 0) - return (KERN_RESOURCE_SHORTAGE); - } - - /* OK, have memory and the task is locked & active */ - threads = (thread_t *)addr; - - i = j = 0; - - for (thread = (thread_t)queue_first(&task->threads); i < actual; - ++i, thread = (thread_t)queue_next(&thread->task_threads)) { - thread_reference_internal(thread); - threads[j++] = thread; - } - - assert(queue_end(&task->threads, (queue_entry_t)thread)); - - actual = j; - size_needed = actual * sizeof (mach_port_t); - - /* can unlock task now that we've got the thread refs */ - task_unlock(task); - - if (actual == 0) { - /* no threads, so return null pointer and deallocate memory */ - - *threads_out = 0; - *count = 0; - - if (size != 0) - kfree(addr, size); - } - else { - /* if we allocated too much, must copy */ - - if (size_needed < size) { - void *newaddr; - - newaddr = kalloc(size_needed); - if (newaddr == 0) { - for (i = 0; i < actual; ++i) - thread_deallocate(threads[i]); - kfree(addr, size); - return (KERN_RESOURCE_SHORTAGE); - } - - bcopy(addr, newaddr, size_needed); - kfree(addr, size); - threads = (thread_t *)newaddr; - } - - *threads_out = threads; - *count = actual; - } - - return (KERN_SUCCESS); -} - - -__private_extern__ -kern_return_t chudxnu_all_tasks(task_array_t *task_list, - mach_msg_type_number_t *count) -{ - return chudxnu_private_processor_set_things(&default_pset, (mach_port_t **)task_list, count, THING_TASK); -} - -__private_extern__ -kern_return_t chudxnu_free_task_list(task_array_t *task_list, - mach_msg_type_number_t *count) -{ - vm_size_t size = (*count)*sizeof(mach_port_t); - void *addr = *task_list; - - if(addr) { - int i, maxCount = *count; - for(i=0; ilast_switch; - return KERN_SUCCESS; -} - #pragma mark **** DEPRECATED **** // DEPRECATED diff --git a/osfmk/chud/ppc/chud_xnu_glue.h b/osfmk/chud/ppc/chud_xnu_glue.h new file mode 100644 index 000000000..ba3aa48a9 --- /dev/null +++ b/osfmk/chud/ppc/chud_xnu_glue.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + diff --git a/osfmk/ppc/chud/chud_xnu_private.h b/osfmk/chud/ppc/chud_xnu_private.h similarity index 64% rename from osfmk/ppc/chud/chud_xnu_private.h rename to osfmk/chud/ppc/chud_xnu_private.h index eeeaa92f2..d41d672c6 100644 --- a/osfmk/ppc/chud/chud_xnu_private.h +++ b/osfmk/chud/ppc/chud_xnu_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,22 +23,27 @@ #ifndef _PPC_CHUD_XNU_PRIVATE_H_ #define _PPC_CHUD_XNU_PRIVATE_H_ -#include -#include -#include - #pragma mark **** thread **** -// ******************************************************************************** +// ***************************************************************************** // thread -// ******************************************************************************** -extern kern_return_t chudxnu_copy_savearea_to_threadstate( thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t *count, - struct savearea *sv); +// ***************************************************************************** +extern kern_return_t chudxnu_copy_savearea_to_threadstate( + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count, + struct savearea *sv); -extern kern_return_t chudxnu_copy_threadstate_to_savearea( struct savearea *sv, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t *count); +extern kern_return_t chudxnu_copy_threadstate_to_savearea( + struct savearea *sv, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count); + +#pragma mark **** cpu timer **** +typedef struct { + timer_call_data_t cpu_timer_call; + uint64_t t_deadline; + chudxnu_cpu_timer_callback_func_t cpu_timer_callback_fn; +} chudcpu_data_t; #endif /* _PPC_CHUD_XNU_PRIVATE_H_ */ diff --git a/osfmk/conf/MASTER.i386 b/osfmk/conf/MASTER.i386 index 1dc361d30..1e9f170b6 100644 --- a/osfmk/conf/MASTER.i386 +++ b/osfmk/conf/MASTER.i386 @@ -12,7 +12,7 @@ # osfmk = [intel pc mach small event vol pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug mk30 mk30_i386] # RELEASE = [intel pc iokit mach_pe mach mach_kdp small event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386] # DEBUG_KDP = [intel pc iokit mach_pe mach mach_kdp small event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 osf_debug debug] -# DEBUG= [intel pc iokit mach_pe mach mach_kdp small event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 osf_debug debug] +# DEBUG= [intel pc iokit mach_pe mach mach_kdp small event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 osf_debug debug mach_kdb] # PROFILE = [ RELEASE profile ] # ###################################################################### @@ -48,7 +48,8 @@ options MACH_BSD options IOKIT # # options MACH_PE # # -#options DDB # Inline debugger # +options DDB # Inline debugger # +options MACH_KDB # # options MACH_KDP # KDP # -#options PAE - +options PAE +options X86_64 diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile index 38fc57eba..212081efc 100644 --- a/osfmk/conf/Makefile +++ b/osfmk/conf/Makefile @@ -90,8 +90,8 @@ export ipc_table.o_CFLAGS_RM=$(CWARNFLAGS_STD) export ipc_table.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mach_debug.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mach_debug.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export mach_msg.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export mach_msg.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export mach_msg.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export mach_msg.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mach_port.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mach_port.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mig_log.o_CFLAGS_RM=$(CWARNFLAGS_STD) @@ -100,24 +100,24 @@ export ipc_clock.o_CFLAGS_RM=$(CWARNFLAGS_STD) export ipc_clock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export ipc_host.o_CFLAGS_RM=$(CWARNFLAGS_STD) export ipc_host.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export ipc_kobject.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export ipc_kobject.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export ipc_kobject.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export ipc_kobject.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export ipc_mig.o_CFLAGS_RM=$(CWARNFLAGS_STD) export ipc_mig.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export ipc_sync.o_CFLAGS_RM=$(CWARNFLAGS_STD) export ipc_sync.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export ipc_tt.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export ipc_tt.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export sync_lock.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export sync_lock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export ipc_tt.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export ipc_tt.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export sync_lock.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export sync_lock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export sync_sema.o_CFLAGS_RM=$(CWARNFLAGS_STD) export sync_sema.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export mach_port_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export mach_port_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export lock_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export lock_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export semaphore_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export semaphore_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export mach_port_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export mach_port_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export lock_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export lock_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export semaphore_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export semaphore_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) # # kern should be warning free (almost) @@ -157,86 +157,86 @@ export mach_clock.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mach_clock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mach_factor.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mach_factor.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export machine.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export machine.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export machine.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export machine.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mk_timer.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mk_timer.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export profile.o_CFLAGS_RM=$(CWARNFLAGS_STD) export profile.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export priority.o_CFLAGS_RM=$(CWARNFLAGS_STD) export priority.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export processor.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export processor.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export processor.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export processor.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export processor_data.o_CFLAGS_RM=$(CWARNFLAGS_STD) export processor_data.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export queue.o_CFLAGS_RM=$(CWARNFLAGS_STD) export queue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export sched_prim.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export sched_prim.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export sched_prim.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export sched_prim.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export sscanf.o_CFLAGS_RM=$(CWARNFLAGS_STD) export sscanf.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export stack.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export stack.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export stack.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export stack.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export startup.o_CFLAGS_RM=$(CWARNFLAGS_STD) export startup.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export syscall_subr.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export syscall_subr.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export syscall_subr.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export syscall_subr.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export syscall_sw.o_CFLAGS_RM=$(CWARNFLAGS_STD) export syscall_sw.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export task.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export task.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export task.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export task.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export task_policy.o_CFLAGS_RM=$(CWARNFLAGS_STD) export task_policy.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export task_swap.o_CFLAGS_RM=$(CWARNFLAGS_STD) export task_swap.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export thread.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export thread.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export thread.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export thread.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export thread_act.o_CFLAGS_RM=$(CWARNFLAGS_STD) export thread_act.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export thread_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export thread_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export thread_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export thread_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export thread_policy.o_CFLAGS_RM=$(CWARNFLAGS_STD) export thread_policy.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export thread_swap.o_CFLAGS_RM=$(CWARNFLAGS_STD) export thread_swap.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export timer.o_CFLAGS_RM=$(CWARNFLAGS_STD) export timer.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export timer_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export timer_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export wait_queue.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export wait_queue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export timer_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export timer_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export wait_queue.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export wait_queue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export zalloc.o_CFLAGS_RM=$(CWARNFLAGS_STD) export zalloc.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export clock_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export clock_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export clock_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export clock_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export clock_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export clock_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export clock_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export clock_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export clock_reply_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) export clock_reply_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export exc_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export exc_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export exc_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export exc_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export host_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export host_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export host_security_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export host_security_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export ledger_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export ledger_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export mach_host_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export mach_host_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export exc_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export exc_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export exc_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export exc_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export host_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export host_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export host_security_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export host_security_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export ledger_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export ledger_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export mach_host_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export mach_host_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export mach_notify_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) export mach_notify_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export processor_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export processor_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export processor_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export processor_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export processor_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export processor_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export processor_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export processor_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) export prof_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) export prof_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export task_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export task_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) -export thread_act_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) -export thread_act_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export task_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export task_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +#export thread_act_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +#export thread_act_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386 index e7e996b62..f6c752838 100644 --- a/osfmk/conf/Makefile.i386 +++ b/osfmk/conf/Makefile.i386 @@ -11,6 +11,8 @@ CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) # Objects that don't compile cleanly: OBJS_NO_WERROR= \ + pms.o \ + etimer.o \ ioconf.o \ UNDRequest.o \ KUNCUserNotifications.o \ @@ -28,7 +30,84 @@ OBJS_NO_WERROR= \ mk_sp.o \ printf.o \ syscall_emulation.o \ - mach_header.o + UNDReplyServer.o \ + ipc_kobject.o \ + ipc_tt.o \ + machine.o \ + processor.o \ + stack.o \ + sched_prim.o \ + sync_lock.o \ + task.o \ + syscall_subr.o \ + thread.o \ + thread_call.o \ + timer_call.o \ + hibernate.o \ + clock_server.o \ + clock_priv_server.o \ + exc_user.o \ + wait_queue.o \ + exc_server.o \ + host_priv_server.o \ + host_priv_server.o \ + ledger_server.o \ + host_security_server.o \ + lock_set_server.o \ + mach_host_server.o \ + mach_port_server.o \ + processor_server.o \ + processor_set_server.o \ + semaphore_server.o \ + task_server.o \ + thread_act_server.o \ + device_server.o \ + pmap.o \ + cpuid.o \ + bsd_i386.o \ + loose_ends.o \ + mp_desc.o \ + perfmon.o \ + pcb.o \ + rtclock.o \ + trap.o \ + bbclock.o \ + mp.o \ + acpi.o \ + serial_console.o \ + text_console.o \ + mtrr.o \ + hibernate_i386.o \ + mach_msg.o \ + mach_header.o \ + chud_osfmk_callback_i386.o \ + chud_thread.o \ + chud_thread_i386.o \ + chud_cpu.o \ + db_access.o \ + db_aout.o \ + db_break.o \ + db_command.o \ + db_cond.o \ + db_disasm.o \ + db_examine.o \ + db_expr.o \ + db_ext_symtab.o \ + db_input.o \ + db_interface.o \ + db_lex.o \ + db_macro.o \ + db_output.o \ + db_print.o \ + db_run.o \ + db_sym.o \ + db_task_thread.o \ + db_trace.o \ + db_trap.o \ + db_variables.o \ + db_watch.o \ + db_write_cmd.o \ + xpr.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff --git a/osfmk/conf/files b/osfmk/conf/files index 3a2b613e0..e9738b439 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -129,8 +129,10 @@ osfmk/ipc/mach_port.c standard osfmk/ipc/mig_log.c optional mig_debug osfmk/kern/ast.c standard osfmk/kern/clock.c standard +osfmk/kern/clock_oldops.c standard osfmk/kern/counters.c standard osfmk/kern/debug.c standard +osfmk/kern/etimer.c standard osfmk/kern/exception.c standard osfmk/kern/host.c standard osfmk/kern/host_notify.c standard @@ -147,6 +149,8 @@ osfmk/kern/mach_clock.c standard osfmk/kern/machine.c standard osfmk/kern/mk_sp.c standard osfmk/kern/mk_timer.c standard +osfmk/kern/pms.c standard +osfmk/kern/page_decrypt.c standard osfmk/kern/profile.c standard osfmk/kern/printf.c standard osfmk/kern/priority.c standard @@ -241,3 +245,14 @@ osfmk/kern/kmod.c standard # funcs). # osfmk/device/subrs.c standard + +# +# MI CHUD: +# +osfmk/chud/chud_cpu.c standard +osfmk/chud/chud_glue.c standard +osfmk/chud/chud_memory.c standard +osfmk/chud/chud_osfmk_callback.c standard +osfmk/chud/chud_thread.c standard + +osfmk/console/serial_general.c standard diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386 index 1121b7b74..55064fe46 100644 --- a/osfmk/conf/files.i386 +++ b/osfmk/conf/files.i386 @@ -16,10 +16,11 @@ OPTIONS/dynamic_num_nodes optional dynamic_num_nodes OPTIONS/vtoc_compat optional vtoc_compat OPTIONS/fddi optional fddi +osfmk/vm/vm_apple_protect.c standard + osfmk/i386/hi_res_clock_map.c optional hi_res_clock osfmk/i386/pmap.c standard -osfmk/i386/read_fault.c standard osfmk/ddb/db_aout.c optional mach_kdb @@ -53,6 +54,7 @@ osfmk/i386/loose_ends.c standard osfmk/i386/locks_i386.c standard osfmk/i386/locore.s standard osfmk/i386/start.s standard +osfmk/i386/lowmem_vectors.s standard osfmk/i386/cswitch.s standard osfmk/i386/machine_routines.c standard osfmk/i386/machine_routines_asm.s standard @@ -65,8 +67,13 @@ osfmk/i386/phys.c standard osfmk/i386/rtclock.c standard osfmk/i386/trap.c standard osfmk/i386/user_ldt.c standard +osfmk/i386/Diagnostics.c standard +osfmk/i386/pmCPU.c standard +osfmk/i386/hpet.c standard +osfmk/i386/tsc.c standard osfmk/i386/commpage/commpage.c standard +osfmk/i386/commpage/commpage_asm.s standard osfmk/i386/commpage/atomic.s standard osfmk/i386/commpage/commpage_mach_absolute_time.s standard osfmk/i386/commpage/spinlocks.s standard @@ -74,12 +81,20 @@ osfmk/i386/commpage/pthreads.s standard osfmk/i386/commpage/cacheflush.s standard osfmk/i386/commpage/commpage_gettimeofday.s standard osfmk/i386/commpage/bcopy_scalar.s standard +osfmk/i386/commpage/bcopy_sse3.s standard +osfmk/i386/commpage/bcopy_sse4.s standard +osfmk/i386/commpage/bcopy_sse4_64.s standard osfmk/i386/commpage/bzero_scalar.s standard +osfmk/i386/commpage/bzero_sse3.s standard +osfmk/i386/commpage/bzero_sse3_64.s standard +osfmk/i386/commpage/memset_pattern_sse3.s standard +osfmk/i386/commpage/memset_pattern_sse3_64.s standard +osfmk/i386/commpage/longcopy_sse4.s standard +osfmk/i386/commpage/longcopy_sse4_64.s standard +osfmk/i386/commpage/commpage_sigs.c standard osfmk/i386/AT386/autoconf.c standard -osfmk/i386/AT386/bbclock.c standard osfmk/i386/AT386/conf.c standard -osfmk/i386/AT386/himem.c optional himem osfmk/i386/AT386/model_dep.c standard osfmk/i386/AT386/physmem.c optional physmem device-driver @@ -114,6 +129,10 @@ osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp osfmk/i386/hibernate_i386.c standard osfmk/i386/hibernate_restore.s standard +osfmk/chud/i386/chud_osfmk_callback_i386.c standard +osfmk/chud/i386/chud_cpu_i386.c standard +osfmk/chud/i386/chud_thread_i386.c standard + # DUMMIES TO FORCE GENERATION OF .h FILES osfmk/OPTIONS/ln optional ln osfmk/OPTIONS/eisa optional eisa @@ -121,3 +140,7 @@ osfmk/OPTIONS/himem optional himem osfmk/OPTIONS/ec optional ec osfmk/OPTIONS/hi_res_clock optional hi_res_clock + +osfmk/i386/startup64.c optional x86_64 +osfmk/i386/start64.s optional x86_64 +osfmk/i386/idt64.s optional x86_64 diff --git a/osfmk/conf/files.ppc b/osfmk/conf/files.ppc index e03ce61b9..ffc13d1d8 100644 --- a/osfmk/conf/files.ppc +++ b/osfmk/conf/files.ppc @@ -64,7 +64,6 @@ osfmk/ppc/Diagnostics.c standard osfmk/ppc/PPCcalls.c standard osfmk/ppc/vmachmon.c standard osfmk/ppc/vmachmon_asm.s standard -osfmk/ppc/pms.c standard osfmk/ppc/pmsCPU.c standard osfmk/ppc/Firmware.s standard @@ -94,12 +93,10 @@ osfmk/ppc/commpage/memset_g3.s standard osfmk/ppc/commpage/memset_g4.s standard osfmk/ppc/commpage/memset_g5.s standard -osfmk/ppc/chud/chud_osfmk_callback.c standard -osfmk/ppc/chud/chud_cpu.c standard -osfmk/ppc/chud/chud_cpu_asm.s standard -osfmk/ppc/chud/chud_memory.c standard -osfmk/ppc/chud/chud_thread.c standard -osfmk/ppc/chud/chud_glue.c standard +osfmk/chud/ppc/chud_cpu_asm.s standard +osfmk/chud/ppc/chud_cpu_ppc.c standard +osfmk/chud/ppc/chud_osfmk_callback_ppc.c standard +osfmk/chud/ppc/chud_thread_ppc.c standard osfmk/kdp/ml/ppc/kdp_machdep.c optional mach_kdp osfmk/kdp/ml/ppc/kdp_vm.c optional mach_kdp diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c index 6808589ae..a8bc82d29 100644 --- a/osfmk/console/i386/serial_console.c +++ b/osfmk/console/i386/serial_console.c @@ -27,6 +27,7 @@ #include #include #include +#include #include static struct { @@ -39,6 +40,9 @@ static struct { decl_simple_lock_data(,write_lock); } console_ring; +hw_lock_data_t cnputc_lock; +static volatile long console_output = 0; + typedef struct console_buf { char *buf_base; char *buf_end; @@ -47,6 +51,35 @@ typedef struct console_buf { char buf[CPU_BUFFER_LEN]; } console_buf_t; +extern int serial_getc(void); +extern void serial_putc(int); + +static void _serial_putc(int, int, int); + +int vcgetc(int, int, boolean_t, boolean_t); + +console_ops_t cons_ops[] = { + {_serial_putc, _serial_getc}, + {vcputc, vcgetc} +}; + +uint32_t nconsops = (sizeof cons_ops / sizeof cons_ops[0]); + +uint32_t cons_ops_index = VC_CONS_OPS; + +/* This macro polls for pending TLB flushes while spinning on a lock + */ +#define SIMPLE_LOCK_NO_INTRS(l) \ +MACRO_BEGIN \ + boolean_t istate = ml_get_interrupts_enabled(); \ + while (!simple_lock_try((l))) \ + { \ + if (!istate) \ + handle_pending_TLB_flushes(); \ + cpu_pause(); \ + } \ +MACRO_END + void console_init(void) { @@ -63,7 +96,7 @@ console_init(void) console_ring.write_ptr = console_ring.buffer; simple_lock_init(&console_ring.read_lock, 0); simple_lock_init(&console_ring.write_lock, 0); - + hw_lock_init(&cnputc_lock); } void * @@ -94,6 +127,12 @@ console_cpu_free(void *buf) kfree((void *) buf, sizeof(console_buf_t)); } +static inline int +console_ring_space(void) +{ + return console_ring.len - console_ring.used; +} + static boolean_t console_ring_put(char ch) { @@ -127,23 +166,83 @@ console_ring_get(void) static inline void cpu_buffer_put(console_buf_t *cbp, char ch) { - if (cbp->buf_ptr < cbp->buf_end) + if (ch != '\0' && cbp->buf_ptr < cbp->buf_end) *(cbp->buf_ptr++) = ch; } static inline void _cnputc(char c) { - vcputc(0, 0, c); + /* The console device output routines are assumed to be + * non-reentrant. + */ + mp_disable_preemption(); + if (!hw_lock_to(&cnputc_lock, LockTimeOut*10)) { + /* If we timed out on the lock, and we're in the debugger, + * break the lock. + */ + if (debug_mode) { + /* Since hw_lock_to takes a pre-emption count...*/ + mp_enable_preemption(); + hw_lock_init(&cnputc_lock); + hw_lock_lock(&cnputc_lock); + } + else + panic("Lock acquire timeout in _cnputc()"); + } + cons_ops[cons_ops_index].putc(0, 0, c); if (c == '\n') - vcputc(0, 0,'\r'); + cons_ops[cons_ops_index].putc(0, 0, '\r'); + hw_lock_unlock(&cnputc_lock); + mp_enable_preemption(); } void cnputcusr(char c) -{ - simple_lock(&console_ring.read_lock); +{ + /* Spin (with pre-emption enabled) waiting for console_ring_try_empty() + * to complete output. There is a small window here where we could + * end up with a stale value of console_output, but it's unlikely, + * and _cnputc(), which outputs to the console device, is internally + * synchronized. There's something of a conflict between the + * character-at-a-time (with pre-emption enabled) unbuffered + * output model here, and the buffered output from cnputc(), + * whose consumers include printf() ( which outputs a sequence + * with pre-emption disabled, and should be safe to call with + * interrupts off); we don't want to disable pre-emption indefinitely + * here, and spinlocks and mutexes are inappropriate. + */ + while (console_output != 0); + _cnputc(c); +} + +static void +console_ring_try_empty(void) +{ + boolean_t state = ml_get_interrupts_enabled(); + /* + * Try to get the read lock on the ring buffer to empty it. + * If this fails someone else is already emptying... + */ + if (!simple_lock_try(&console_ring.read_lock)) + return; + /* Indicate that we're in the process of writing a block of data + * to the console. + */ + atomic_incl(&console_output, 1); + for (;;) { + char ch; + if (!state) + handle_pending_TLB_flushes(); + SIMPLE_LOCK_NO_INTRS(&console_ring.write_lock); + ch = console_ring_get(); + simple_unlock(&console_ring.write_lock); + if (ch == 0) + break; + _cnputc(ch); + } + atomic_decl(&console_output, 1); simple_unlock(&console_ring.read_lock); } @@ -151,16 +250,29 @@ void cnputc(char c) { console_buf_t *cbp; - - if (!(real_ncpus > 1)) { +#if MACH_KDB + /* Bypass locking/buffering if in debugger */ + if (kdb_cpu == cpu_number()) { _cnputc(c); return; } - +#endif /* MACH_KDB */ mp_disable_preemption(); - /* add to stack buf */ cbp = (console_buf_t *) current_cpu_datap()->cpu_console_buf; + if (cbp == NULL) { + mp_enable_preemption(); + /* Put directly if console ring is not initialized */ + _cnputc(c); + return; + } + + /* add to stack buf */ if (c != '\n') { + /* XXX - cpu_buffer_put() can fail silently if the buffer + * is exhausted, as can happen if there's a long sequence + * of data with no newlines. We should, instead, attempt + * a flush. + */ cpu_buffer_put(cbp, c); } else { boolean_t state; @@ -170,34 +282,70 @@ cnputc(char c) /* copy this buffer into the shared ring buffer */ state = ml_set_interrupts_enabled(FALSE); - simple_lock(&console_ring.write_lock); - for (cp = cbp->buf_base; cp < cbp->buf_ptr; cp++) { - while (!console_ring_put(*cp)) - /* spin if share buffer full */ - cpu_pause(); - } - (void) console_ring_put('\n'); - simple_unlock(&console_ring.write_lock); - ml_set_interrupts_enabled(state); - cbp->buf_ptr = cbp->buf_base; + SIMPLE_LOCK_NO_INTRS(&console_ring.write_lock); /* - * Try to get the read lock on the ring buffer to empty it. - * If this fails someone else is already emptying... + * Is there enough space in the shared ring buffer? + * Try to empty if not. + * Note, we want the entire local buffer to fit to + * avoid another cpu interjecting. */ - if (simple_lock_try(&console_ring.read_lock)) { - for (;;) { - char ch; - - simple_lock(&console_ring.write_lock); - ch = console_ring_get(); - simple_unlock(&console_ring.write_lock); - if (ch == 0) - break; - _cnputc(ch); - } - simple_unlock(&console_ring.read_lock); + while (cbp->buf_ptr-cbp->buf_base + 1 > console_ring_space()) { + simple_unlock(&console_ring.write_lock); + console_ring_try_empty(); + SIMPLE_LOCK_NO_INTRS(&console_ring.write_lock); } + for (cp = cbp->buf_base; cp < cbp->buf_ptr; cp++) + console_ring_put(*cp); + console_ring_put('\n'); + cbp->buf_ptr = cbp->buf_base; + simple_unlock(&console_ring.write_lock); + ml_set_interrupts_enabled(state); } + console_ring_try_empty(); mp_enable_preemption(); } + +int _serial_getc(__unused int a, __unused int b, boolean_t wait, __unused boolean_t raw) +{ + int c; + do { + c = serial_getc(); + } while (wait && c < 0); + + return c; +} + +static void _serial_putc(__unused int a, __unused int b, int c) +{ + serial_putc(c); +} + + +int +cngetc(void) +{ + return cons_ops[cons_ops_index].getc(0, 0, + TRUE, FALSE); +} + +int +cnmaygetc(void) +{ + return cons_ops[cons_ops_index].getc(0, 0, + FALSE, FALSE); +} + +int +vcgetc(__unused int l, + __unused int u, + __unused boolean_t wait, + __unused boolean_t raw) +{ + char c; + + if( 0 == (*PE_poll_input)( 0, &c)) + return( c); + else + return( 0); +} diff --git a/osfmk/console/i386/text_console.c b/osfmk/console/i386/text_console.c index a5b7cad90..95a83b8f3 100644 --- a/osfmk/console/i386/text_console.c +++ b/osfmk/console/i386/text_console.c @@ -26,7 +26,7 @@ * VGA text console support. */ -#include +#include #include #include "text_console.h" @@ -98,6 +98,7 @@ move_up( csrpos_t from, csrpos_t to, int count) { + if (vram_start == 0) return; kd_slmscu( vram_start + from, vram_start + to, count ); } @@ -111,6 +112,7 @@ move_down( csrpos_t from, csrpos_t to, int count ) { + if (vram_start == 0) return; kd_slmscd( vram_start + from, vram_start + to, count ); } @@ -124,6 +126,7 @@ clear_block( csrpos_t start, int size, char attr) { + if (vram_start == 0) return; kd_slmwd( vram_start + start, size, ((unsigned short) attr << 8) + SPACE_CHAR); } @@ -171,6 +174,7 @@ display_char( csrpos_t pos, /* where to put it */ char ch, /* the character */ char attr ) /* its attribute */ { + if (vram_start == 0) return; *(vram_start + pos) = ch; *(vram_start + pos + 1) = attr; } diff --git a/osfmk/console/i386/text_console.h b/osfmk/console/i386/text_console.h index da74b9964..f0921a20a 100644 --- a/osfmk/console/i386/text_console.h +++ b/osfmk/console/i386/text_console.h @@ -23,8 +23,6 @@ #ifndef _TEXT_CONSOLE_H_ #define _TEXT_CONSOLE_H_ -#define TEXT_MODE 0 - void tc_paint_char(int x, int y, unsigned char ch, int attrs, unsigned char ch_previous, int attrs_previous); void tc_scroll_down(int lines, int top, int bottom); void tc_scroll_up(int lines, int top, int bottom); diff --git a/osfmk/console/panic_dialog.c b/osfmk/console/panic_dialog.c index 1e7a26f93..6c56b8e9c 100644 --- a/osfmk/console/panic_dialog.c +++ b/osfmk/console/panic_dialog.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -517,9 +518,9 @@ blit_digit( int digit ) for( j=FONT_WIDTH-1; j>=0; j--) { if ( bits & 0x80 ) - rendered_font[row][j] = 0x0100 | panic_dialog->pd_info_color[0]; + rendered_font[row][j] = OSSwapBigToHostInt16(0x0100 | panic_dialog->pd_info_color[0]); else - rendered_font[row][j] = 0x0100 | panic_dialog->pd_info_color[1]; + rendered_font[row][j] = OSSwapBigToHostInt16(0x0100 | panic_dialog->pd_info_color[1]); bits <<= 1; } } @@ -878,16 +879,16 @@ findbestgray( unsigned int color24 ) static unsigned char color24togray8( unsigned int color24 ) { - float R, G, B; - float Gray; + int R, G, B; + int Gray; unsigned char gray8; R = (color24 & 0xFF0000) >> 16 ; G = (color24 & 0xFF00) >> 8 ; B = (color24 & 0xFF); - Gray = (R*.30) + (G*.59) + (B*.11); - gray8 = (unsigned char) ( Gray + .5); + Gray = (R*30) + (G*59) + (B*11); + gray8 = (unsigned char) ((Gray + 50) / 100); return gray8; } diff --git a/osfmk/console/ppc/serial_console.c b/osfmk/console/ppc/serial_console.c index 080ba0d8f..bb5e7f87a 100644 --- a/osfmk/console/ppc/serial_console.c +++ b/osfmk/console/ppc/serial_console.c @@ -34,6 +34,7 @@ #include /* spl definitions */ #include #include +#include #include #include #include @@ -56,29 +57,9 @@ */ const int console_unit = 0; -const int console_chan_default = CONSOLE_PORT; +const uint32_t console_chan_default = CONSOLE_PORT; #define console_chan (console_chan_default) /* ^ cpu_number()) */ -#define OPS(putc, getc, nosplputc, nosplgetc) putc, getc - -const struct console_ops { - int (*putc)(int, int, int); - int (*getc)(int, int, boolean_t, boolean_t); -} cons_ops[] = { -#define SCC_CONS_OPS 0 - {OPS(scc_putc, scc_getc, no_spl_scputc, no_spl_scgetc)}, -#define VC_CONS_OPS 1 - {OPS(vcputc, vcgetc, no_spl_vcputc, no_spl_vcgetc)}, -}; -#define NCONSOPS (sizeof cons_ops / sizeof cons_ops[0]) - -#if SERIAL_CONSOLE_DEFAULT -#define CONS_OPS SCC_CONS_OPS -#define CONS_NAME "com" -#else -#define CONS_OPS VC_CONS_OPS -#define CONS_NAME "vc" -#endif #define MP_SAFE_CONSOLE 1 /* Set this to 1 to allow more than 1 processor to print at once */ #if MP_SAFE_CONSOLE @@ -97,8 +78,17 @@ volatile unsigned int sconowner=-1; /* Mark who's actually writing */ #endif +#define OPS(putc, getc, nosplputc, nosplgetc) putc, getc + +console_ops_t cons_ops[] = { + {OPS(scc_putc, scc_getc, no_spl_scputc, no_spl_scgetc)}, + {OPS(vcputc, vcgetc, no_spl_vcputc, no_spl_vcgetc)}, +}; + +uint32_t nconsops = (sizeof cons_ops / sizeof cons_ops[0]); + +uint32_t cons_ops_index = VC_CONS_OPS; -unsigned int cons_ops_index = CONS_OPS; unsigned int killprint = 0; unsigned int debcnputc = 0; extern unsigned int mappingdeb0; @@ -313,43 +303,6 @@ cnmaygetc() FALSE, FALSE); } -boolean_t console_is_serial() -{ - return cons_ops_index == SCC_CONS_OPS; -} - -int -switch_to_video_console() -{ - int old_cons_ops = cons_ops_index; - cons_ops_index = VC_CONS_OPS; - return old_cons_ops; -} - -int -switch_to_serial_console() -{ - int old_cons_ops = cons_ops_index; - cons_ops_index = SCC_CONS_OPS; - return old_cons_ops; -} - -/* The switch_to_{video,serial,kgdb}_console functions return a cookie that - can be used to restore the console to whatever it was before, in the - same way that splwhatever() and splx() work. */ -void -switch_to_old_console(int old_console) -{ - static boolean_t squawked; - unsigned int ops = old_console; - - if (ops >= NCONSOPS && !squawked) { - squawked = TRUE; - printf("switch_to_old_console: unknown ops %d\n", ops); - } else - cons_ops_index = ops; -} - int vcgetc(__unused int l, diff --git a/osfmk/console/serial_general.c b/osfmk/console/serial_general.c new file mode 100644 index 000000000..16ce74940 --- /dev/null +++ b/osfmk/console/serial_general.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ + +#include +#include +#include +#include +#include +#include +#include + +extern unsigned int disableSerialOuput; +extern void cons_cinput(char ch); /* The BSD routine that gets characters */ + +unsigned int serialmode; /* Serial mode keyboard and console control */ + +/* + * This routine will start a thread that polls the serial port, listening for + * characters that have been typed. + */ + +void +serial_keyboard_init(void) +{ + kern_return_t result; + thread_t thread; + + if(!(serialmode & 2)) return; /* Leave if we do not want a serial console */ + + kprintf("Serial keyboard started\n"); + result = kernel_thread_start_priority((thread_continue_t)serial_keyboard_start, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + panic("serial_keyboard_init"); + + thread_deallocate(thread); +} + +void +serial_keyboard_start(void) +{ + + serial_keyboard_poll(); /* Go see if there are any characters pending now */ + panic("serial_keyboard_start: we can't get back here\n"); + +} + +void +serial_keyboard_poll(void) +{ + int chr; + uint64_t next; + + + while(1) { /* Do this for a while */ + chr = _serial_getc(0, 1, 0, 1); /* Get a character if there is one */ + if(chr < 0) break; /* The serial buffer is empty */ + cons_cinput((char)chr); /* Buffer up the character */ + } + + clock_interval_to_deadline(16, 1000000, &next); /* Get time of pop */ + + assert_wait_deadline((event_t)serial_keyboard_poll, THREAD_UNINT, next); /* Show we are "waiting" */ + thread_block((thread_continue_t)serial_keyboard_poll); /* Wait for it */ + panic("serial_keyboard_poll: Shouldn't never ever get here...\n"); +} + +boolean_t console_is_serial() +{ + return cons_ops_index == SERIAL_CONS_OPS; +} + +int +switch_to_video_console() +{ + int old_cons_ops = cons_ops_index; + cons_ops_index = VC_CONS_OPS; + return old_cons_ops; +} + +int +switch_to_serial_console() +{ + int old_cons_ops = cons_ops_index; + cons_ops_index = SERIAL_CONS_OPS; + return old_cons_ops; +} + +/* The switch_to_{video,serial,kgdb}_console functions return a cookie that + can be used to restore the console to whatever it was before, in the + same way that splwhatever() and splx() work. */ +void +switch_to_old_console(int old_console) +{ + static boolean_t squawked; + uint32_t ops = old_console; + + if ((ops >= nconsops) && !squawked) { + squawked = TRUE; + printf("switch_to_old_console: unknown ops %d\n", ops); + } else + cons_ops_index = ops; +} diff --git a/osfmk/console/serial_protos.h b/osfmk/console/serial_protos.h new file mode 100644 index 000000000..4d6279d70 --- /dev/null +++ b/osfmk/console/serial_protos.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ + + + +void serial_keyboard_init(void); +void serial_keyboard_start(void); +void serial_keyboard_poll(void); + +extern uint32_t serialmode; +extern uint32_t cons_ops_index; +extern uint32_t nconsops; + +extern int _serial_getc(int unit, int line, boolean_t wait, boolean_t raw); + +extern boolean_t console_is_serial(void); +extern int switch_to_serial_console(void); +extern int switch_to_video_console(void); +extern void switch_to_old_console(int old_console); + +struct console_ops { + int (*putc)(int, int, int); + int (*getc)(int, int, boolean_t, boolean_t); +} console_ops; +typedef struct console_ops console_ops_t; + + +#define SERIAL_CONS_OPS 0 +#define VC_CONS_OPS 1 diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 9e00806ff..133097adb 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -95,6 +95,7 @@ #include #include +#include #include @@ -252,6 +253,8 @@ gc_clear_screen(int xx, int yy, int top, int bottom, int which) { spl_t s; + if (!gc_buffer_size) return; + s = splhigh(); simple_lock(&gc_buffer_lock); @@ -986,6 +989,8 @@ gc_scroll_down(int num, int top, int bottom) { spl_t s; + if (!gc_buffer_size) return; + s = splhigh(); simple_lock(&gc_buffer_lock); @@ -1066,6 +1071,8 @@ gc_scroll_up(int num, int top, int bottom) { spl_t s; + if (!gc_buffer_size) return; + s = splhigh(); simple_lock(&gc_buffer_lock); @@ -2106,14 +2113,16 @@ static void vc_progress_task( void * arg0, void * arg ) #ifdef __i386__ #include +#include #endif /* __i386__ */ static boolean_t gc_acquired = FALSE; static boolean_t gc_graphics_boot = FALSE; -static unsigned int lastVideoPhys = 0; -static unsigned int lastVideoVirt = 0; -static unsigned int lastVideoSize = 0; +static unsigned int lastVideoPhys = 0; +static unsigned int lastVideoVirt = 0; +static unsigned int lastVideoSize = 0; +static boolean_t lastVideoMapped = FALSE; void initialize_screen(Boot_Video * boot_vinfo, unsigned int op) @@ -2129,16 +2138,34 @@ initialize_screen(Boot_Video * boot_vinfo, unsigned int op) /* * First, check if we are changing the size and/or location of the framebuffer */ - vinfo.v_name[0] = 0; vinfo.v_width = boot_vinfo->v_width; vinfo.v_height = boot_vinfo->v_height; vinfo.v_depth = boot_vinfo->v_depth; vinfo.v_rowbytes = boot_vinfo->v_rowBytes; vinfo.v_physaddr = boot_vinfo->v_baseAddr; /* Get the physical address */ +#ifdef __i386__ + vinfo.v_type = boot_vinfo->v_display; +#else + vinfo.v_type = 0; +#endif + - kprintf("initialize_screen: b=%08X, w=%08X, h=%08X, r=%08X\n", /* (BRINGUP) */ - vinfo.v_physaddr, vinfo.v_width, vinfo.v_height, vinfo.v_rowbytes); /* (BRINGUP) */ + kprintf("initialize_screen: b=%08X, w=%08X, h=%08X, r=%08X, d=%08X\n", /* (BRINGUP) */ + vinfo.v_physaddr, vinfo.v_width, vinfo.v_height, vinfo.v_rowbytes, vinfo.v_type); /* (BRINGUP) */ + +#ifdef __i386__ + if ( (vinfo.v_type == VGA_TEXT_MODE) ) + { + if (vinfo.v_physaddr == 0) { + vinfo.v_physaddr = 0xb8000; + vinfo.v_width = 80; + vinfo.v_height = 25; + vinfo.v_depth = 8; + vinfo.v_rowbytes = 0x8000; + } + } +#endif /* __i386__ */ if (!vinfo.v_physaddr) /* Check to see if we have a framebuffer */ { @@ -2165,39 +2192,43 @@ initialize_screen(Boot_Video * boot_vinfo, unsigned int op) vinfo.v_physaddr = (fbppage << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ } -#ifdef __i386__ - vinfo.v_type = boot_vinfo->v_display; -#else - vinfo.v_type = 0; -#endif - fbsize = round_page_32(vinfo.v_height * vinfo.v_rowbytes); /* Remember size */ if ((lastVideoPhys != vinfo.v_physaddr) || (fbsize > lastVideoSize)) /* Did framebuffer change location or get bigger? */ { - newVideoVirt = io_map_spec((vm_offset_t)vinfo.v_physaddr, fbsize); /* Allocate address space for framebuffer */ + unsigned int +#if FALSE + flags = (vinfo.v_type == VGA_TEXT_MODE) ? VM_WIMG_IO : VM_WIMG_WCOMB; +#else + flags = VM_WIMG_IO; +#endif + newVideoVirt = io_map_spec((vm_offset_t)vinfo.v_physaddr, fbsize, flags); /* Allocate address space for framebuffer */ if (lastVideoVirt) /* Was the framebuffer mapped before? */ { - pmap_remove(kernel_pmap, trunc_page_64(lastVideoVirt), - round_page_64(lastVideoVirt + lastVideoSize)); /* Toss mappings */ - - if(lastVideoVirt <= vm_last_addr) /* Was this not a special pre-VM mapping? */ +#if FALSE + if(lastVideoMapped) /* Was this not a special pre-VM mapping? */ +#endif + { + pmap_remove(kernel_pmap, trunc_page_64(lastVideoVirt), + round_page_64(lastVideoVirt + lastVideoSize)); /* Toss mappings */ + } + if(lastVideoMapped) /* Was this not a special pre-VM mapping? */ { kmem_free(kernel_map, lastVideoVirt, lastVideoSize); /* Toss kernel addresses */ } } - lastVideoPhys = vinfo.v_physaddr; /* Remember the framebuffer address */ lastVideoSize = fbsize; /* Remember the size */ lastVideoVirt = newVideoVirt; /* Remember the virtual framebuffer address */ + lastVideoMapped = (NULL != kernel_map); } } vinfo.v_baseaddr = lastVideoVirt; /* Set the new framebuffer address */ #ifdef __i386__ - if ( (vinfo.v_type == TEXT_MODE) ) + if ( (vinfo.v_type == VGA_TEXT_MODE) ) { // Text mode setup by the booter. diff --git a/osfmk/ddb/db_command.c b/osfmk/ddb/db_command.c index 77832b4b5..03fe35f3b 100644 --- a/osfmk/ddb/db_command.c +++ b/osfmk/ddb/db_command.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -159,8 +159,6 @@ void db_command_list( char *last_modifp, /* IN_OUT */ struct db_command *cmd_table); - - /* * Search for command prefix. */ @@ -504,20 +502,33 @@ struct db_command db_command_table[] = { { "dmacro", (db_func) db_del_macro_cmd, CS_OWN, 0 }, { "show", 0, 0, db_show_cmds }, { "cpu", (db_func) db_switch_cpu, 0, 0 }, + { "dr", db_display_real, CS_MORE|CS_SET_DOT, 0 }, + { "di", db_display_iokit, CS_MORE, 0 }, + { "dk", db_display_kmod, CS_MORE, 0 }, + { "reboot", (db_func) db_reboot, 0, 0 }, -#if defined(__ppc__) +#if !defined(__ppc__) + { "pm", db_pmgr, CS_MORE, 0 }, + { "na", db_nap, CS_MORE, 0 }, + { "ms", db_msr, CS_MORE, 0 }, + { "cp", db_cpuid, CS_MORE, 0 }, + { "da", db_apic, CS_MORE, 0 }, + { "ts", db_test, CS_MORE, 0 }, + { "dn", db_intcnt, CS_MORE, 0 }, + { "hp", db_hpet, CS_MORE, 0 }, + { "cf", db_cfg, CS_MORE, 0 }, + { "dt", db_dtimers, CS_MORE, 0 }, +#endif +#if defined(__ppc__) { "lt", db_low_trace, CS_MORE|CS_SET_DOT, 0 }, { "dl", db_display_long, CS_MORE|CS_SET_DOT, 0 }, { "dc", db_display_char, CS_MORE|CS_SET_DOT, 0 }, - { "dr", db_display_real, CS_MORE|CS_SET_DOT, 0 }, { "dv", db_display_virtual, CS_MORE|CS_SET_DOT, 0 }, { "dm", db_display_mappings, CS_MORE|CS_SET_DOT, 0 }, { "dh", db_display_hash, CS_MORE|CS_SET_DOT, 0 }, { "dp", db_display_pmap, CS_MORE, 0 }, - { "di", db_display_iokit, CS_MORE, 0 }, { "ds", db_display_save, CS_MORE|CS_SET_DOT, 0 }, { "dx", db_display_xregs, CS_MORE|CS_SET_DOT, 0 }, - { "dk", db_display_kmod, CS_MORE, 0 }, { "gs", db_gsnoop, CS_MORE, 0 }, { "cm", db_check_mappings, CS_MORE, 0 }, { "cp", db_check_pmaps, CS_MORE, 0 }, diff --git a/osfmk/ddb/db_run.c b/osfmk/ddb/db_run.c index d0898f80c..4a00102db 100644 --- a/osfmk/ddb/db_run.c +++ b/osfmk/ddb/db_run.c @@ -71,6 +71,8 @@ #include #include +#include + boolean_t db_sstep_print; int db_loop_count; int db_call_depth; @@ -504,6 +506,19 @@ db_continue_cmd( db_cmd_loop_done = 1; } + +/* + * Switch to gdb + */ +void +db_to_gdb( + void) +{ + extern unsigned int switch_debugger; + + switch_debugger=1; +} + /* gdb */ void db_continue_gdb( @@ -512,9 +527,7 @@ db_continue_gdb( db_expr_t count, char * modif) { -#if defined(__ppc__) db_to_gdb(); -#endif db_run_mode = STEP_CONTINUE; db_inst_count = 0; db_last_inst_count = 0; @@ -525,9 +538,9 @@ db_continue_gdb( } - boolean_t db_in_single_step(void) { return(db_run_mode != STEP_NONE && db_run_mode != STEP_CONTINUE); } + diff --git a/osfmk/ddb/db_task_thread.c b/osfmk/ddb/db_task_thread.c index 23626b8a5..09229702e 100644 --- a/osfmk/ddb/db_task_thread.c +++ b/osfmk/ddb/db_task_thread.c @@ -69,8 +69,8 @@ #define DB_MAX_THREADID 0x10000 /* max # of threads in a task */ #define DB_MAX_PSETS 0x10000 /* max # of processor sets */ -task_t db_default_task; /* default target task */ -thread_t db_default_act; /* default target thr_act */ +task_t db_default_task = TASK_NULL; /* default target task */ +thread_t db_default_act = THREAD_NULL; /* default target thr_act */ diff --git a/osfmk/ddb/db_trap.c b/osfmk/ddb/db_trap.c index 59a007cdf..d95ff4536 100644 --- a/osfmk/ddb/db_trap.c +++ b/osfmk/ddb/db_trap.c @@ -64,7 +64,7 @@ #include #include #include -#include /* For db_printf() */ +#include /* For db_printf() */ #include #include #include @@ -99,8 +99,16 @@ db_task_trap( * but print symbols using a (task-specific) symbol table, found * using task. */ + + /* Elided since walking the thread/task lists before setting up + * safe recovery points is incorrect, and could + * potentially cause us to loop and fault indefinitely. + */ +#if 0 db_init_default_act(); +#endif db_check_breakpoint_valid(); + if (db_stop_at_pc(&bkpt, task, task_space)) { if (db_inst_count) { db_printf("After %d instructions (%d loads, %d stores),\n", diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c index b2409b19b..9373974d6 100644 --- a/osfmk/default_pager/default_pager.c +++ b/osfmk/default_pager/default_pager.c @@ -308,6 +308,8 @@ start_def_pager( __unused char *bs_device ) if (!default_pager_backing_store_monitor_callout) panic("can't start backing store monitor thread"); thread_call_enter(default_pager_backing_store_monitor_callout); + + return (0); } /* diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h index 631c5b681..b01a94cae 100644 --- a/osfmk/default_pager/default_pager_internal.h +++ b/osfmk/default_pager/default_pager_internal.h @@ -537,7 +537,7 @@ struct clmap { (clm)->cl_alloc.clb_map >>= (VSCLSIZE(vs) - (clm)->cl_numpages) typedef struct vstruct_alias { - int *name; + memory_object_pager_ops_t name; struct vstruct *vs; } vstruct_alias_t; @@ -574,9 +574,11 @@ typedef struct vstruct_alias { * VM Object Structure: This is the structure used to manage * default pager object associations with their control counter- * parts (VM objects). + * + * The start of this structure MUST match a "struct memory_object". */ typedef struct vstruct { - int *vs_mem_obj; /* our memory obj - temp */ + memory_object_pager_ops_t vs_pager_ops; /* == &default_pager_ops */ int vs_mem_obj_ikot;/* JMM:fake ip_kotype() */ memory_object_control_t vs_control; /* our mem obj control ref */ VS_LOCK_TYPE vs_lock; /* data for the lock */ @@ -727,9 +729,11 @@ __private_extern__ zone_t vstruct_zone; */ #ifdef MACH_KERNEL -#define ISVS ((int *)123456) +extern const struct memory_object_pager_ops default_pager_ops; + #define mem_obj_is_vs(_mem_obj_) \ - (((_mem_obj_) != NULL) && ((_mem_obj_)->pager == ISVS)) + (((_mem_obj_) != NULL) && \ + ((_mem_obj_)->mo_pager_ops == &default_pager_ops)) #define mem_obj_to_vs(_mem_obj_) \ ((vstruct_t)(_mem_obj_)) #define vs_to_mem_obj(_vs_) ((memory_object_t)(_vs_)) diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index f0ea16a7b..60136048e 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -563,7 +563,7 @@ default_pager_backing_store_create( kalloc(sizeof (struct vstruct_alias)); if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)bs; - alias_struct->name = ISVS; + alias_struct->name = &default_pager_ops; port->alias = (int) alias_struct; } else { @@ -714,7 +714,7 @@ ps_delete( error = KERN_FAILURE; else { vm_object_t transfer_object; - int count; + unsigned int count; upl_t upl; transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER); @@ -1121,7 +1121,7 @@ vs_alloc_async(void) kalloc(sizeof (struct vstruct_alias)); if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)vsa; - alias_struct->name = ISVS; + alias_struct->name = &default_pager_ops; reply_port->alias = (int) alias_struct; vsa->reply_port = reply_port; vs_alloc_async_count++; @@ -1173,7 +1173,7 @@ vs_alloc_async(void) kalloc(sizeof (struct vstruct_alias)); if(alias_struct != NULL) { alias_struct->vs = reply_port; - alias_struct->name = ISVS; + alias_struct->name = &default_pager_ops; reply_port->alias = (int) vsa; vsa->reply_port = reply_port; vs_alloc_async_count++; @@ -1229,7 +1229,7 @@ ps_vstruct_create( /* * The following fields will be provided later. */ - vs->vs_mem_obj = NULL; + vs->vs_pager_ops = NULL; vs->vs_control = MEMORY_OBJECT_CONTROL_NULL; vs->vs_references = 1; vs->vs_seqno = 0; @@ -2469,6 +2469,7 @@ ps_read_device( __unused int flags) { panic("ps_read_device not supported"); + return KERN_FAILURE; } kern_return_t @@ -2480,6 +2481,7 @@ ps_write_device( __unused struct vs_async *vsa) { panic("ps_write_device not supported"); + return KERN_FAILURE; } #endif /* DEVICE_PAGING */ @@ -2516,7 +2518,7 @@ pvs_cluster_read( upl_t upl; kern_return_t error = KERN_SUCCESS; int size; - int residual; + unsigned int residual; unsigned int request_flags; int seg_index; int pages_in_cl; @@ -2564,8 +2566,8 @@ pvs_cluster_read( } while (cnt && (error == KERN_SUCCESS)) { - int ps_info_valid; - int page_list_count; + int ps_info_valid; + unsigned int page_list_count; if((vs_offset & cl_mask) && (cnt > (VM_SUPER_CLUSTER - @@ -2783,7 +2785,7 @@ pvs_cluster_read( failed_size = xfer_size; if (error == KERN_SUCCESS) { - if (residual == xfer_size) { + if ((signed) residual == xfer_size) { /* * If a read operation returns no error * and no data moved, we turn it into @@ -2872,7 +2874,7 @@ vs_cluster_write( cl_size = pages_in_cl * vm_page_size; if (!dp_internal) { - int page_list_count; + unsigned int page_list_count; int request_flags; unsigned int super_size; int first_dirty; @@ -3425,7 +3427,7 @@ vs_cluster_transfer( kern_return_t error = KERN_SUCCESS; unsigned int size, size_wanted; int i; - unsigned int residual; + unsigned int residual = 0; unsigned int unavail_size; // default_pager_thread_t *dpt; // boolean_t dealloc; @@ -3536,6 +3538,10 @@ vs_cluster_transfer( original_read_vsmap = *vsmap_ptr; if(ps->ps_segtype == PS_PARTITION) { + panic("swap partition not supported\n"); + /*NOTREACHED*/ + error = KERN_FAILURE; + residual = size; /* NEED TO ISSUE WITH SYNC & NO COMMIT error = ps_read_device(ps, actual_offset, &buffer, diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c index c8ce4820e..77ebe0662 100644 --- a/osfmk/default_pager/dp_memory_object.c +++ b/osfmk/default_pager/dp_memory_object.c @@ -351,6 +351,20 @@ default_pager_add( #endif +const struct memory_object_pager_ops default_pager_ops = { + dp_memory_object_reference, + dp_memory_object_deallocate, + dp_memory_object_init, + dp_memory_object_terminate, + dp_memory_object_data_request, + dp_memory_object_data_return, + dp_memory_object_data_initialize, + dp_memory_object_data_unlock, + dp_memory_object_synchronize, + dp_memory_object_unmap, + "default pager" +}; + kern_return_t dp_memory_object_init( memory_object_t mem_obj, @@ -709,7 +723,7 @@ dp_memory_object_data_return( /* a synchronous interface */ /* return KERN_LOCK_OWNED; */ upl_t upl; - int page_list_count = 0; + unsigned int page_list_count = 0; memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)offset, size, size, @@ -724,8 +738,8 @@ dp_memory_object_data_return( if ((vs->vs_seqno != vs->vs_next_seqno++) || (vs->vs_readers) || (vs->vs_xfer_pending)) { - upl_t upl; - int page_list_count = 0; + upl_t upl; + unsigned int page_list_count = 0; vs->vs_next_seqno--; VS_UNLOCK(vs); @@ -809,7 +823,7 @@ default_pager_memory_object_create( * and this default_pager structure */ - vs->vs_mem_obj = ISVS; + vs->vs_pager_ops = &default_pager_ops; vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT; /* @@ -844,7 +858,7 @@ default_pager_object_create( * Set up associations between the default pager * and this vstruct structure */ - vs->vs_mem_obj = ISVS; + vs->vs_pager_ops = &default_pager_ops; vstruct_list_insert(vs); *mem_objp = vs_to_mem_obj(vs); return KERN_SUCCESS; diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index 3830eaaff..54e4f0f70 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -82,6 +82,7 @@ type io_struct_inband_t = array[*:4096] of char; type io_scalar_inband_t = array[*:16] of int; type io_async_ref_t = array[*:8] of natural_t; type io_buf_ptr_t = ^array[] of MACH_MSG_TYPE_INTEGER_8; +type NDR_record_t = struct[8] of char; type io_object_t = mach_port_t ctype: mach_port_t @@ -491,6 +492,16 @@ routine io_object_get_bundle_identifier( out class_name : io_name_t ); +routine io_service_open_extended( + service : io_object_t; + in owningTask : task_t; + in connect_type : int; + in ndr : NDR_record_t; + in properties : io_buf_ptr_t, physicalcopy; + out result : natural_t; + out connection : io_connect_t + ); + #endif diff --git a/osfmk/device/device_types.defs b/osfmk/device/device_types.defs index a42ac7c77..b499e8d8a 100644 --- a/osfmk/device/device_types.defs +++ b/osfmk/device/device_types.defs @@ -91,7 +91,7 @@ type io_done_queue_t = mach_port_t #if KERNEL_SERVER intran: io_done_queue_t io_done_queue_port_lookup(mach_port_t) outtran: mach_port_t convert_io_done_queue_to_port(io_done_queue_t) -#endif KERNEL_SERVER +#endif /* KERNEL_SERVER */ ; import ; diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index c1e4c2d51..20622aa84 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -88,6 +88,9 @@ iokit_client_memory_for_type( vm_address_t * address, vm_size_t * size ); + +extern ppnum_t IOGetLastPageNumber(void); + /* * Lookup a device by its port. * Doesn't consume the naked send right; produces a device reference. @@ -394,36 +397,13 @@ iokit_notify( mach_msg_header_t * msg ) /* need to create a pmap function to generalize */ unsigned int IODefaultCacheBits(addr64_t pa) { - unsigned int flags; -#ifndef i386 - struct phys_entry * pp; - - // Find physical address - if ((pp = pmap_find_physentry(pa >> 12))) { - // Use physical attributes as default - // NOTE: DEVICE_PAGER_FLAGS are made to line up - flags = VM_MEM_COHERENT; /* We only support coherent memory */ - if(pp->ppLink & ppG) flags |= VM_MEM_GUARDED; /* Add in guarded if it is */ - if(pp->ppLink & ppI) flags |= VM_MEM_NOT_CACHEABLE; /* Add in cache inhibited if so */ - } else - // If no physical, just hard code attributes - flags = VM_WIMG_IO; -#else - extern pmap_paddr_t avail_end; - if (pa < avail_end) - flags = VM_WIMG_COPYBACK; - else - flags = VM_WIMG_IO; -#endif - - return flags; + return(pmap_cache_attributes(pa >> PAGE_SHIFT)); } -kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa, - vm_size_t length, unsigned int options) +kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t pa, + mach_vm_size_t length, unsigned int options) { - vm_size_t off; vm_prot_t prot; unsigned int flags; pmap_t pmap = map->pmap; @@ -446,7 +426,7 @@ kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa, flags = VM_WIMG_WTHRU; break; - case kIOWriteCombineCache: + case kIOMapWriteCombineCache: flags = VM_WIMG_WCOMB; break; @@ -454,33 +434,100 @@ kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa, flags = VM_WIMG_COPYBACK; break; } -#if __ppc__ // Set up a block mapped area - pmap_map_block(pmap, (addr64_t)va, (ppnum_t)(pa >> 12), (uint32_t)(length >> 12), prot, flags, 0); + pmap_map_block(pmap, va, (ppnum_t)atop_64(pa), (uint32_t) atop_64(round_page_64(length)), prot, flags, 0); + + return( KERN_SUCCESS ); +} + +kern_return_t IOUnmapPages(vm_map_t map, mach_vm_address_t va, mach_vm_size_t length) +{ + pmap_t pmap = map->pmap; + + pmap_remove(pmap, trunc_page_64(va), round_page_64(va + length)); + + return( KERN_SUCCESS ); +} + +kern_return_t IOProtectCacheMode(vm_map_t map, mach_vm_address_t va, + mach_vm_size_t length, unsigned int options) +{ + mach_vm_size_t off; + vm_prot_t prot; + unsigned int flags; + pmap_t pmap = map->pmap; + + prot = (options & kIOMapReadOnly) + ? VM_PROT_READ : (VM_PROT_READ|VM_PROT_WRITE); + + switch (options & kIOMapCacheMask) + { + /* What cache mode do we need? */ + case kIOMapDefaultCache: + default: + return (KERN_INVALID_ARGUMENT); + + case kIOMapInhibitCache: + flags = VM_WIMG_IO; + break; + + case kIOMapWriteThruCache: + flags = VM_WIMG_WTHRU; + break; + case kIOMapWriteCombineCache: + flags = VM_WIMG_WCOMB; + break; + + case kIOMapCopybackCache: + flags = VM_WIMG_COPYBACK; + break; + } +#if __ppc__ + // can't remap block mappings, but ppc doesn't speculative read from WC #else -// enter each page's physical address in the target map + // enter each page's physical address in the target map for (off = 0; off < length; off += page_size) - pmap_enter(pmap, va + off, (pa + off) >> 12, prot, flags, TRUE); + { + ppnum_t ppnum = pmap_find_phys(pmap, va + off); + if (ppnum) + pmap_enter(pmap, va + off, ppnum, prot, flags, TRUE); + } #endif - return( KERN_SUCCESS ); + return (KERN_SUCCESS); } -kern_return_t IOUnmapPages(vm_map_t map, vm_offset_t va, vm_size_t length) +ppnum_t IOGetLastPageNumber(void) { - pmap_t pmap = map->pmap; + ppnum_t lastPage, highest = 0; - pmap_remove(pmap, trunc_page_64(va), round_page_64(va + length)); - - return( KERN_SUCCESS ); +#if __ppc__ + int idx; + for (idx = 0; idx < pmap_mem_regions_count; idx++) + { + lastPage = pmap_mem_regions[idx].mrEnd; +#elif __i386__ + unsigned int idx; + for (idx = 0; idx < pmap_memory_region_count; idx++) + { + lastPage = pmap_memory_regions[idx].end - 1; +#else +#error arch +#endif + if (lastPage > highest) + highest = lastPage; + } + return (highest); } + void IOGetTime( mach_timespec_t * clock_time); void IOGetTime( mach_timespec_t * clock_time) { clock_get_system_nanotime(&clock_time->tv_sec, &clock_time->tv_nsec); } + diff --git a/osfmk/i386/AT386/bbclock.c b/osfmk/i386/AT386/bbclock.c deleted file mode 100644 index f52b3694b..000000000 --- a/osfmk/i386/AT386/bbclock.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - Copyright 1988, 1989 by Intel Corporation, Santa Clara, California. - - All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appears in all -copies and that both the copyright notice and this permission notice -appear in supporting documentation, and that the name of Intel -not be used in advertising or publicity pertaining to distribution -of the software without specific, written prior permission. - -INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, -IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, -NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION -WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* local data */ -static int month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -extern char dectohexdec( - int n); -extern int hexdectodec( - char c); -extern int yeartoday( - int yr); -extern void rtcput( - struct rtc_st * regs); -extern int rtcget( - struct rtc_st * regs); - -#define LOCK_BBC() splclock() -#define UNLOCK_BBC(s) splx(s) - -/* - * Configure battery-backed clock. - */ -int -bbc_config(void) -{ - int BbcFlag; - struct rtc_st rtclk; - - mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return(1); - } - - /* - * Setup device. - */ - outb(RTC_ADDR, RTC_A); - outb(RTC_DATA, RTC_DIV2 | RTC_RATE6); - outb(RTC_ADDR, RTC_B); - outb(RTC_DATA, RTC_HM); - - /* - * Probe the device by trying to read it. - */ - BbcFlag = (rtcget(&rtclk) ? 0 : 1); - if (BbcFlag) - printf("battery clock configured\n"); - else - printf("WARNING: Battery Clock Failure!\n"); - mp_enable_preemption(); - return (BbcFlag); -} - -/* - * Get the current clock time. - */ -kern_return_t -bbc_gettime( - mach_timespec_t *cur_time) /* OUT */ -{ - struct rtc_st rtclk; - time_t n; - int sec, min, hr, dom, mon, yr; - int i, days = 0; - spl_t s; - thread_t thread; - - if ((thread = current_thread()) != THREAD_NULL) { - thread_bind(thread, master_processor); - mp_disable_preemption(); - if (current_processor() != master_processor) { - mp_enable_preemption(); - thread_block(THREAD_CONTINUE_NULL); - } else { - mp_enable_preemption(); - } - } - - s = LOCK_BBC(); - rtcget(&rtclk); - sec = hexdectodec(rtclk.rtc_sec); - min = hexdectodec(rtclk.rtc_min); - hr = hexdectodec(rtclk.rtc_hr); - dom = hexdectodec(rtclk.rtc_dom); - mon = hexdectodec(rtclk.rtc_mon); - yr = hexdectodec(rtclk.rtc_yr); - yr = (yr < 70) ? yr+100 : yr; - n = sec + 60 * min + 3600 * hr; - n += (dom - 1) * 3600 * 24; - if (yeartoday(yr) == 366) - month[1] = 29; - for (i = mon - 2; i >= 0; i--) - days += month[i]; - month[1] = 28; - for (i = 70; i < yr; i++) - days += yeartoday(i); - n += days * 3600 * 24; - cur_time->tv_sec = n; - cur_time->tv_nsec = 0; - UNLOCK_BBC(s); - - if (thread != THREAD_NULL) - thread_bind(thread, PROCESSOR_NULL); - return (KERN_SUCCESS); -} - -/* - * Set the current clock time. - */ -kern_return_t -bbc_settime( - mach_timespec_t *new_time) -{ - struct rtc_st rtclk; - time_t n; - int diff, i, j; - spl_t s; - thread_t thread; - - if ((thread = current_thread()) != THREAD_NULL) { - thread_bind(thread, master_processor); - mp_disable_preemption(); - if (current_processor() != master_processor) { - mp_enable_preemption(); - thread_block(THREAD_CONTINUE_NULL); - } else { - mp_enable_preemption(); - } - } - - s = LOCK_BBC(); - rtcget(&rtclk); - diff = 0; - n = (new_time->tv_sec - diff) % (3600 * 24); /* hrs+mins+secs */ - rtclk.rtc_sec = dectohexdec(n%60); - n /= 60; - rtclk.rtc_min = dectohexdec(n%60); - rtclk.rtc_hr = dectohexdec(n/60); - n = (new_time->tv_sec - diff) / (3600 * 24); /* days */ - rtclk.rtc_dow = (n + 4) % 7; /* 1/1/70 is Thursday */ - for (j = 70; n >= (i = yeartoday(j)); j++) - n -= i; - rtclk.rtc_yr = dectohexdec(j % 100); - if (yeartoday(j) == 366) - month[1] = 29; - for (i = 0; n >= month[i]; i++) - n -= month[i]; - month[1] = 28; - rtclk.rtc_mon = dectohexdec(++i); - rtclk.rtc_dom = dectohexdec(++n); - rtcput(&rtclk); - UNLOCK_BBC(s); - - if (thread != THREAD_NULL) - thread_bind(current_thread(), PROCESSOR_NULL); - - return (KERN_SUCCESS); -} - - -/* DEVICE SPECIFIC ROUTINES */ - -int -rtcget( - struct rtc_st * regs) -{ - outb(RTC_ADDR, RTC_D); - if ((inb(RTC_DATA) & RTC_VRT) == 0) - return (-1); - outb(RTC_ADDR, RTC_A); - while (inb(RTC_DATA) & RTC_UIP) /* busy wait */ - outb(RTC_ADDR, RTC_A); - load_rtc((unsigned char *)regs); - return (0); -} - -void -rtcput( - struct rtc_st * regs) -{ - register unsigned char x; - - outb(RTC_ADDR, RTC_B); - x = inb(RTC_DATA); - outb(RTC_ADDR, RTC_B); - outb(RTC_DATA, x | RTC_SET); - save_rtc((unsigned char *)regs); - outb(RTC_ADDR, RTC_B); - outb(RTC_DATA, x & ~RTC_SET); -} - -int -yeartoday( - int year) -{ - year += 1900; - return((year % 4) ? 365 : - ((year % 100) ? 366 : ((year % 400) ? 365: 366))); -} - -int -hexdectodec( - char n) -{ - return ((((n >> 4) & 0x0F) * 10) + (n & 0x0F)); -} - -char -dectohexdec( - int n) -{ - return ((char)(((n / 10) << 4) & 0xF0) | ((n % 10) & 0x0F)); -} diff --git a/osfmk/i386/AT386/conf.c b/osfmk/i386/AT386/conf.c index 8001faf2f..3be143d73 100644 --- a/osfmk/i386/AT386/conf.c +++ b/osfmk/i386/AT386/conf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -72,9 +72,9 @@ extern struct clock_ops sysclk_ops, calend_ops; struct clock clock_list[] = { /* SYSTEM_CLOCK */ - { &sysclk_ops, 0, 0, {0} }, + { &sysclk_ops, 0, 0 }, /* CALENDAR_CLOCK */ - { &calend_ops, 0, 0, {0} } + { &calend_ops, 0, 0 } }; int clock_count = sizeof(clock_list) / sizeof(clock_list[0]); diff --git a/osfmk/i386/AT386/misc_protos.h b/osfmk/i386/AT386/misc_protos.h index bd38951bd..8c6c7e604 100644 --- a/osfmk/i386/AT386/misc_protos.h +++ b/osfmk/i386/AT386/misc_protos.h @@ -33,7 +33,6 @@ */ extern void i386_init(void); -extern void i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args); extern void machine_init(void); extern void machine_startup(void); @@ -44,13 +43,6 @@ extern void machine_startup(void); extern void cninit(void); extern void kdreboot(void); -/* - * i386/locore.s - */ - -extern void kdb_kintr(void); -extern void kgdb_kintr(void); - /* * i386/db_interface.c */ diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 588e59611..938f59992 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,7 +62,6 @@ #include #include -#include #include @@ -81,20 +80,33 @@ #include #include #include -#include #include -#include #include +#include +#include #include +#include /* inb() */ #include #if MACH_KDB #include #endif /* MACH_KDB */ +#include +#include +#include + #include #include #include +#include + +#include + +#include +#include +#include +#include void enable_bluebox(void); void disable_bluebox(void); @@ -108,6 +120,27 @@ extern int max_poll_quanta; extern int idlehalt; extern unsigned int panic_is_inited; +int db_run_mode; + +static int packAsc (uint8_t *inbuf, unsigned int length); +extern int kdb_printf(const char *fmt, ...); + +volatile int pbtcpu = -1; +hw_lock_data_t pbtlock; /* backtrace print lock */ +uint32_t pbtcnt = 0; + +extern const char version[]; + +typedef struct _cframe_t { + struct _cframe_t *prev; + unsigned caller; + unsigned args[0]; +} cframe_t; + +void panic_i386_backtrace(void *_frame, int nframes); + +unsigned panic_io_port = 0; + void machine_startup() { @@ -128,8 +161,8 @@ machine_startup() #if NOTYET hw_lock_init(&debugger_lock); /* initialize debugger lock */ - hw_lock_init(&pbtlock); /* initialize print backtrace lock */ #endif + hw_lock_init(&pbtlock); /* initialize print backtrace lock */ #if MACH_KDB /* @@ -170,6 +203,32 @@ machine_startup() if (PE_parse_boot_arg("idlehalt", &boot_arg)) { idlehalt = boot_arg; } +/* The I/O port to issue a read from, in the event of a panic. Useful for + * triggering logic analyzers. + */ + if (PE_parse_boot_arg("panic_io_port", &boot_arg)) { + /*I/O ports range from 0 through 0xFFFF */ + panic_io_port = boot_arg & 0xffff; + } + +/* + * fn is used to force napping. + * fn=0 means no napping allowed + * fn=1 means forces napping on, normal C2 and C4 transitions + * fn=2 means forces napping on, but C4 is disabled + * fn=3 means forces napping on, but use halt + * fn=4 means forces napping on and will always use C4 + * + * Note that this will take effect only when the system normally starts napping. + * + */ + + if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0; /* If force nap not set, make 0 */ + else { + if(forcenap < 5) forcenap = forcenap + 1; /* See comments above for decode, this is set to fn + 1 */ + else forcenap = 0; /* Clear for error case */ + } + machine_nap_policy(); /* Make sure the nap policy reflects the user's choice */ machine_conf(); @@ -191,18 +250,363 @@ machine_conf(void) machine_info.memory_size = mem_size; } + +extern void *gPEEFIRuntimeServices; +extern void *gPEEFISystemTable; + +/*- + * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or + * code or tables extracted from it, as desired without restriction. + * + * First, the polynomial itself and its table of feedback terms. The + * polynomial is + * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 + * + * Note that we take it "backwards" and put the highest-order term in + * the lowest-order bit. The X^32 term is "implied"; the LSB is the + * X^31 term, etc. The X^0 term (usually shown as "+1") results in + * the MSB being 1 + * + * Note that the usual hardware shift register implementation, which + * is what we're using (we're merely optimizing it by doing eight-bit + * chunks at a time) shifts bits into the lowest-order term. In our + * implementation, that means shifting towards the right. Why do we + * do it this way? Because the calculated CRC must be transmitted in + * order from highest-order term to lowest-order term. UARTs transmit + * characters in order from LSB to MSB. By storing the CRC this way + * we hand it to the UART in the order low-byte to high-byte; the UART + * sends each low-bit to hight-bit; and the result is transmission bit + * by bit from highest- to lowest-order term without requiring any bit + * shuffling on our part. Reception works similarly + * + * The feedback terms table consists of 256, 32-bit entries. Notes + * + * The table can be generated at runtime if desired; code to do so + * is shown later. It might not be obvious, but the feedback + * terms simply represent the results of eight shift/xor opera + * tions for all combinations of data and CRC register values + * + * The values must be right-shifted by eight bits by the "updcrc + * logic; the shift must be unsigned (bring in zeroes). On some + * hardware you could probably optimize the shift in assembler by + * using byte-swap instructions + * polynomial $edb88320 + * + * + * CRC32 code derived from work by Gary S. Brown. + */ + +static uint32_t crc32_tab[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +static uint32_t +crc32(uint32_t crc, const void *buf, size_t size) +{ + const uint8_t *p; + + p = buf; + crc = crc ^ ~0U; + + while (size--) + crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); + + return crc ^ ~0U; +} + +static void +efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table) +{ + EFI_RUNTIME_SERVICES_64 *runtime; + uint32_t hdr_cksum; + uint32_t cksum; + + kprintf("Processing 64-bit EFI tables at 0x%x\n", (unsigned int)system_table); + do { + if (system_table->Hdr.Signature != EFI_SYSTEM_TABLE_SIGNATURE) { + kprintf("Bad EFI system table signature\n"); + break; + } + // Verify signature of the system table + hdr_cksum = system_table->Hdr.CRC32; + system_table->Hdr.CRC32 = 0; + cksum = crc32(0L, system_table, system_table->Hdr.HeaderSize); + + //kprintf("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum); + system_table->Hdr.CRC32 = hdr_cksum; + if (cksum != hdr_cksum) { + kprintf("Bad EFI system table checksum\n"); + break; + } + + gPEEFISystemTable = system_table; + + kprintf("RuntimeServices table at 0x%qx\n", system_table->RuntimeServices); + runtime = (EFI_RUNTIME_SERVICES_64 *) (uint32_t)system_table->RuntimeServices; // XXX + kprintf("Checking runtime services table 0x%x\n", runtime); + if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { + kprintf("Bad EFI runtime table signature\n"); + break; + } + + // Verify signature of runtime services table + hdr_cksum = runtime->Hdr.CRC32; + runtime->Hdr.CRC32 = 0; + cksum = crc32(0L, runtime, runtime->Hdr.HeaderSize); + + //kprintf("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum); + runtime->Hdr.CRC32 = hdr_cksum; + if (cksum != hdr_cksum) { + kprintf("Bad EFI runtime table checksum\n"); + break; + } + + gPEEFIRuntimeServices = runtime; + } + while (FALSE); +} + +static void +efi_set_tables_32(EFI_SYSTEM_TABLE * system_table) +{ + EFI_RUNTIME_SERVICES *runtime; + uint32_t hdr_cksum; + uint32_t cksum; + + kprintf("Processing 32-bit EFI tables at 0x%x\n", (unsigned int)system_table); + do { + if (system_table->Hdr.Signature != EFI_SYSTEM_TABLE_SIGNATURE) { + kprintf("Bad EFI system table signature\n"); + break; + } + // Verify signature of the system table + hdr_cksum = system_table->Hdr.CRC32; + system_table->Hdr.CRC32 = 0; + cksum = crc32(0L, system_table, system_table->Hdr.HeaderSize); + + //kprintf("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum); + system_table->Hdr.CRC32 = hdr_cksum; + if (cksum != hdr_cksum) { + kprintf("Bad EFI system table checksum\n"); + break; + } + + gPEEFISystemTable = system_table; + + runtime = (EFI_RUNTIME_SERVICES *) system_table->RuntimeServices; + if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { + kprintf("Bad EFI runtime table signature\n"); + break; + } + + // Verify signature of runtime services table + hdr_cksum = runtime->Hdr.CRC32; + runtime->Hdr.CRC32 = 0; + cksum = crc32(0L, runtime, runtime->Hdr.HeaderSize); + + //kprintf("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum); + runtime->Hdr.CRC32 = hdr_cksum; + if (cksum != hdr_cksum) { + kprintf("Bad EFI runtime table checksum\n"); + break; + } + + gPEEFIRuntimeServices = runtime; + } + while (FALSE); +} + + +/* Map in EFI runtime areas. */ +static void +efi_init(void) +{ + boot_args *args = (boot_args *)PE_state.bootArgs; + + kprintf("Initializing EFI runtime services\n"); + + do + { + vm_offset_t vm_size, vm_addr; + vm_map_offset_t phys_addr; + EfiMemoryRange *mptr; + unsigned int msize, mcount; + unsigned int i; + + msize = args->MemoryMapDescriptorSize; + mcount = args->MemoryMapSize / msize; + + mptr = (EfiMemoryRange *)args->MemoryMap; + for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { + if (((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) ) { + vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_addr = (vm_offset_t) mptr->VirtualStart; + phys_addr = (vm_map_offset_t) mptr->PhysicalStart; + pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size), + (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE, + (mptr->Type == EfiMemoryMappedIO) ? VM_WIMG_IO : VM_WIMG_USE_DEFAULT); + } + } + + if (args->Version > 1) + panic("Incompatible boot args version %d\n", args->Version); + + kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); + if (args->Revision >= 4 && args->efiMode == kBootArgsEfiMode64) { + efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) args->efiSystemTable); + } else { + efi_set_tables_32((EFI_SYSTEM_TABLE *) args->efiSystemTable); + } + } + while (FALSE); + + return; +} + +/* Remap EFI runtime areas. */ +void +hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_offset) +{ + boot_args *args = (boot_args *)PE_state.bootArgs; + + kprintf("Reinitializing EFI runtime services\n"); + + if (args->Revision < 3) + return; + do + { + vm_offset_t vm_size, vm_addr; + vm_map_offset_t phys_addr; + EfiMemoryRange *mptr; + unsigned int msize, mcount; + unsigned int i; + + gPEEFISystemTable = 0; + gPEEFIRuntimeServices = 0; + + system_table_offset += ptoa_32(args->efiRuntimeServicesPageStart); + + kprintf("Old system table %p, new %p\n", + args->efiSystemTable, (void *) system_table_offset); + + args->efiSystemTable = (uint32_t) system_table_offset; + + kprintf("Old map:\n"); + msize = args->MemoryMapDescriptorSize; + mcount = args->MemoryMapSize / msize; + mptr = (EfiMemoryRange *)args->MemoryMap; + for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { + if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) { + + vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_addr = (vm_offset_t) mptr->VirtualStart; + phys_addr = (vm_map_offset_t) mptr->PhysicalStart; + + kprintf("mapping[%d] %qx @ %x, %x\n", mptr->Type, phys_addr, vm_addr, mptr->NumberOfPages); + } + } + + pmap_remove(kernel_pmap, i386_ptob(args->efiRuntimeServicesPageStart), + i386_ptob(args->efiRuntimeServicesPageStart + args->efiRuntimeServicesPageCount)); + + kprintf("New map:\n"); + msize = args->MemoryMapDescriptorSize; + mcount = map_size / msize; + mptr = map; + for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { + if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) { + + vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_addr = (vm_offset_t) mptr->VirtualStart; + phys_addr = (vm_map_offset_t) mptr->PhysicalStart; + + kprintf("mapping[%d] %qx @ %x, %x\n", mptr->Type, phys_addr, vm_addr, mptr->NumberOfPages); + + pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size), + (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE, + (mptr->Type == EfiMemoryMappedIO) ? VM_WIMG_IO : VM_WIMG_USE_DEFAULT); + } + } + + if (args->Version > 1) + panic("Incompatible boot args version %d\n", args->Version); + + kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); + if (args->Revision >= 4 && args->efiMode == kBootArgsEfiMode64) { + efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) args->efiSystemTable); + } else { + efi_set_tables_32((EFI_SYSTEM_TABLE *) args->efiSystemTable); + } + } + while (FALSE); + + kprintf("Done reinitializing EFI runtime services\n"); + + return; +} + /* * Find devices. The system is alive. */ void machine_init(void) { + /* Ensure panic buffer is initialized. */ + debug_log_init(); + /* * Display CPU identification */ - cpuid_cpu_display("CPU identification", 0); - cpuid_feature_display("CPU features", 0); + cpuid_cpu_display("CPU identification"); + cpuid_feature_display("CPU features"); + cpuid_extfeature_display("CPU extended features"); + /* + * Initialize EFI runtime services. + */ + efi_init(); smp_init(); @@ -250,14 +654,6 @@ void halt_all_cpus(boolean_t reboot) { if (reboot) { - /* - * Tell the BIOS not to clear and test memory. - */ -#if 0 /* XXX fixme */ - if (!reset_mem_on_reboot) - *(unsigned short *)phystokv(0x472) = 0x1234; -#endif - printf("MACH Reboot\n"); PEHaltRestart( kPERestartCPU ); } else { @@ -267,37 +663,118 @@ halt_all_cpus(boolean_t reboot) while(1); } -/*XXX*/ -void fc_get(mach_timespec_t *ts); -#include -extern kern_return_t sysclk_gettime( - mach_timespec_t *cur_time); -void fc_get(mach_timespec_t *ts) { - (void )sysclk_gettime(ts); +/* For use with the MP rendezvous mechanism + */ + +static void +machine_halt_cpu(__unused void *arg) { + __asm__ volatile("hlt"); } void Debugger( const char *message) { + unsigned long pi_size = 0; + void *stackptr; + hw_atomic_add(&debug_mode, 1); if (!panic_is_inited) { postcode(PANIC_HLT); asm("hlt"); } + printf("Debugger called: <%s>\n", message); kprintf("Debugger called: <%s>\n", message); - draw_panic_dialog(); + /* + * Skip the graphical panic box if no panic string. + * This is the case if we're being called from + * host_reboot(,HOST_REBOOT_DEBUGGER) + * as a quiet way into the debugger. + */ + + if (panicstr) { + disable_preemption(); + +/* Issue an I/O port read if one has been requested - this is an event logic + * analyzers can use as a trigger point. + */ + if (panic_io_port) + (void)inb(panic_io_port); + + /* Obtain current frame pointer */ + __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); + + /* Print backtrace - callee is internally synchronized */ + panic_i386_backtrace(stackptr, 16); + + /* everything should be printed now so copy to NVRAM + */ + + if( debug_buf_size > 0) { + /* Do not compress the panic log + * or save to NVRAM unless kernel debugging + * is disabled. The NVRAM shim doesn't + * sync to the store until haltRestart is called. + */ + if (!panicDebugging) { + unsigned int bufpos; + + debug_putc(0); + + /* Now call the compressor */ + /* XXX Consider using the WKdm compressor in the + * future, rather than just packing - would need to + * be co-ordinated with crashreporter, which decodes + * this post-restart. + */ + bufpos = packAsc ((uint8_t *)debug_buf, + (unsigned int) (debug_buf_ptr - debug_buf) ); + /* If compression was successful, + * use the compressed length + */ + if (bufpos) { + debug_buf_ptr = debug_buf + bufpos; + } + /* Save panic log to non-volatile store + * Panic info handler must truncate data that is + * too long for this platform. + * This call must save data synchronously, + * since we can subsequently halt the system. + */ + pi_size = debug_buf_ptr - debug_buf; + pi_size = PESavePanicInfo((unsigned char *)debug_buf, + pi_size ); + } + } + draw_panic_dialog(); + + if (!panicDebugging) { + /* Clear the MP rendezvous function lock, in the event + * that a panic occurred while in that codepath. + */ + mp_rendezvous_break_lock(); + /* Force all CPUs to disable interrupts and HLT. + * We've panicked, and shouldn't depend on the + * PEHaltRestart() mechanism, which relies on several + * bits of infrastructure. + */ + mp_rendezvous_no_intrs(machine_halt_cpu, NULL); + /* NOT REACHED */ + } + } __asm__("int3"); + hw_atomic_sub(&debug_mode, 1); } void enable_bluebox(void) { } + void disable_bluebox(void) { @@ -310,3 +787,257 @@ machine_boot_info(char *buf, __unused vm_size_t size) return buf; } + +struct pasc { + unsigned a: 7; + unsigned b: 7; + unsigned c: 7; + unsigned d: 7; + unsigned e: 7; + unsigned f: 7; + unsigned g: 7; + unsigned h: 7; +} __attribute__((packed)); + +typedef struct pasc pasc_t; + +static int packAsc (unsigned char *inbuf, unsigned int length) +{ + unsigned int i, j = 0; + unsigned int extra; + pasc_t pack; + + for (i = 0; i < length; i+=8) + { + pack.a = inbuf[i]; + pack.b = inbuf[i+1]; + pack.c = inbuf[i+2]; + pack.d = inbuf[i+3]; + pack.e = inbuf[i+4]; + pack.f = inbuf[i+5]; + pack.g = inbuf[i+6]; + pack.h = inbuf[i+7]; + bcopy ((char *) &pack, inbuf + j, 7); + j += 7; + } + extra = (i - length); + if (extra > 0) { + inbuf[j - extra] &= (0xFF << (8-extra)); + } + return j-((extra == 7) ? 6 : extra); +} + +/* Routines for address - symbol translation. Not called unless the "keepsyms" + * boot-arg is supplied. + */ + +static int +panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) +{ + struct nlist *sym = NULL; + struct load_command *cmd; + struct segment_command *orig_ts = NULL, *orig_le = NULL; + struct symtab_command *orig_st = NULL; + unsigned int i; + char *strings, *bestsym = NULL; + vm_address_t bestaddr = 0, diff, curdiff; + + if (mh->magic != MH_MAGIC) { + /* bad magic number */ + return 0; + } + + cmd = (struct load_command *) &mh[1]; + for (i = 0; i < mh->ncmds; i++) { + if (cmd->cmd == LC_SEGMENT) { + struct segment_command *orig_sg = (struct segment_command *) cmd; + + if (strcmp(SEG_TEXT, orig_sg->segname) == 0) + orig_ts = orig_sg; + else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0) + orig_le = orig_sg; + else if (strcmp("", orig_sg->segname) == 0) + orig_ts = orig_sg; /* kexts have a single unnamed segment */ + } + else if (cmd->cmd == LC_SYMTAB) + orig_st = (struct symtab_command *) cmd; + + cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); + } + + if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) + return 0; + + /* kexts don't have a LINKEDIT segment for now, so we'll never get this far for kexts */ + + vm_address_t slide = ((vm_address_t)mh) - orig_ts->vmaddr; + if (slide != 0) + search -= slide; /* adjusting search since the binary has slid */ + + if ((search < orig_ts->vmaddr) || + (search >= orig_ts->vmaddr + orig_ts->vmsize)) { + /* search out of range for this mach header */ + return 0; + } + + sym = (struct nlist *)orig_le->vmaddr; + strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); + diff = search; + + for (i = 0; i < orig_st->nsyms; i++) { + if (sym[i].n_value <= search) { + curdiff = search - (vm_address_t)sym[i].n_value; + if (curdiff < diff) { + diff = curdiff; + bestaddr = sym[i].n_value; + bestsym = strings + sym[i].n_un.n_strx; + } + } + } + + if (bestsym != NULL) { + if (diff != 0) { + kdb_printf("%s + 0x%08x ", bestsym, diff); + } else { + kdb_printf("%s ", bestsym); + } + return 1; + } + return 0; +} + +extern kmod_info_t * kmod; /* the list of modules */ + +static void +panic_print_kmod_symbol_name(vm_address_t search) +{ + kmod_info_t * current_kmod = kmod; + + while (current_kmod != NULL) { + if ((current_kmod->address <= search) && + (current_kmod->address + current_kmod->size > search)) + break; + current_kmod = current_kmod->next; + } + if (current_kmod != NULL) { + /* if kexts had symbol table loaded, we'd call search_symbol_name again; alas, they don't */ + kdb_printf("%s + %d ", current_kmod->name, search - current_kmod->address); + } +} + +extern struct mach_header _mh_execute_header; /* the kernel's mach header */ + +static void +panic_print_symbol_name(vm_address_t search) +{ + /* try searching in the kernel */ + if (panic_print_macho_symbol_name(&_mh_execute_header, search) == 0) { + /* that failed, now try to search for the right kext */ + panic_print_kmod_symbol_name(search); + } +} + +/* Generate a backtrace, given a frame pointer - this routine + * should walk the stack safely. The trace is appended to the panic log + * and conditionally, to the console. If the trace contains kernel module + * addresses, display the module name, load address and dependencies. + */ + +#define DUMPFRAMES 32 +#define PBT_TIMEOUT_CYCLES (5 * 1000 * 1000 * 1000ULL) +void +panic_i386_backtrace(void *_frame, int nframes) +{ + cframe_t *frame = (cframe_t *)_frame; + vm_offset_t raddrs[DUMPFRAMES]; + int frame_index; + volatile uint32_t *ppbtcnt = &pbtcnt; + uint64_t bt_tsc_timeout; + boolean_t keepsyms = FALSE; + + if(pbtcpu != cpu_number()) { + hw_atomic_add(&pbtcnt, 1); + /* Spin on print backtrace lock, which serializes output + * Continue anyway if a timeout occurs. + */ + hw_lock_to(&pbtlock, LockTimeOut*100); + pbtcpu = cpu_number(); + } + + PE_parse_boot_arg("keepsyms", &keepsyms); + + kdb_printf("Backtrace, " + "Format - Frame : Return Address (4 potential args on stack) "); + + for (frame_index = 0; frame_index < nframes; frame_index++) { + vm_offset_t curframep = (vm_offset_t) frame; + + if (!curframep) + break; + + if (curframep & 0x3) { + kdb_printf("Unaligned frame\n"); + goto invalid; + } + + if (!kvtophys(curframep) || + !kvtophys(curframep + sizeof(cframe_t))) { + kdb_printf("No mapping exists for frame pointer\n"); + goto invalid; + } + + kdb_printf("\n0x%x : 0x%x ", + frame, frame->caller); + if (frame_index < DUMPFRAMES) + raddrs[frame_index] = frame->caller; + + if (kvtophys((vm_offset_t)&(frame->args[3]))) + kdb_printf("(0x%x 0x%x 0x%x 0x%x) ", + frame->args[0], frame->args[1], + frame->args[2], frame->args[3]); + + /* Display address-symbol translation only if the "keepsyms" + * boot-arg is suppplied, since we unload LINKEDIT otherwise. + * This routine is potentially unsafe; also, function + * boundary identification is unreliable after a strip -x. + */ + if (keepsyms) + panic_print_symbol_name((vm_address_t)frame->caller); + + /* Stack grows downward */ + if (frame->prev < frame) { + frame = frame->prev; + goto invalid; + } + frame = frame->prev; + } + + if (frame_index >= nframes) + kdb_printf("\tBacktrace continues...\n"); + + goto out; + +invalid: + kdb_printf("Backtrace terminated-invalid frame pointer 0x%x\n",frame); +out: + + /* Identify kernel modules in the backtrace and display their + * load addresses and dependencies. This routine should walk + * the kmod list safely. + */ + if (frame_index) + kmod_dump((vm_offset_t *)&raddrs[0], frame_index); + + kdb_printf("\nKernel version:\n%s\n\n",version); + + /* Release print backtrace lock, to permit other callers in the + * event of panics on multiple processors. + */ + hw_lock_unlock(&pbtlock); + hw_atomic_sub(&pbtcnt, 1); + /* Wait for other processors to complete output + * Timeout and continue after PBT_TIMEOUT_CYCLES. + */ + bt_tsc_timeout = rdtsc64() + PBT_TIMEOUT_CYCLES; + while(*ppbtcnt && (rdtsc64() < bt_tsc_timeout)); +} diff --git a/osfmk/i386/AT386/rtc.h b/osfmk/i386/AT386/rtc.h deleted file mode 100644 index 9dd18e914..000000000 --- a/osfmk/i386/AT386/rtc.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:39 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:40 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1994/09/23 01:47:30 ezf - * change marker to not FREE - * [1994/09/22 21:20:22 ezf] - * - * Revision 1.1.2.3 1993/08/09 19:39:04 dswartz - * Add ANSI prototypes - CR#9523 - * [1993/08/06 17:51:17 dswartz] - * - * Revision 1.1.2.2 1993/06/02 23:21:32 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:03:17 jeffc] - * - * Revision 1.1 1992/09/30 02:27:20 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.7 91/05/14 16:30:03 mrt - * Correcting copyright - * - * Revision 2.6 91/03/16 14:47:03 rpd - * Fixed ioctl definitions for ANSI C. - * [91/02/20 rpd] - * - * Revision 2.5 91/02/05 17:20:25 mrt - * Changed to new Mach copyright - * [91/02/01 17:47:16 mrt] - * - * Revision 2.4 90/11/26 14:51:02 rvb - * jsb bet me to XMK34, sigh ... - * [90/11/26 rvb] - * Synched 2.5 & 3.0 at I386q (r1.5.1.3) & XMK35 (r2.4) - * [90/11/15 rvb] - * - * Revision 1.5.1.2 90/07/27 11:27:06 rvb - * Fix Intel Copyright as per B. Davies authorization. - * [90/07/27 rvb] - * - * Revision 2.2 90/05/03 15:46:11 dbg - * First checkin. - * - * Revision 1.5.1.1 90/01/08 13:29:46 rvb - * Add Intel copyright. - * [90/01/08 rvb] - * - * Revision 1.5 89/09/25 12:27:37 rvb - * File was provided by Intel 9/18/89. - * [89/09/23 rvb] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Copyright 1988, 1989 by Intel Corporation, Santa Clara, California. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies and that both the copyright notice and this permission notice - * appear in supporting documentation, and that the name of Intel - * not be used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * - * INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, - * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, - * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#define RTC_ADDR 0x70 /* I/O port address for register select */ -#define RTC_DATA 0x71 /* I/O port address for data read/write */ - -/* - * Register A definitions - */ -#define RTC_A 0x0a /* register A address */ -#define RTC_UIP 0x80 /* Update in progress bit */ -#define RTC_DIV0 0x00 /* Time base of 4.194304 MHz */ -#define RTC_DIV1 0x10 /* Time base of 1.048576 MHz */ -#define RTC_DIV2 0x20 /* Time base of 32.768 KHz */ -#define RTC_RATE6 0x06 /* interrupt rate of 976.562 */ - -/* - * Register B definitions - */ -#define RTC_B 0x0b /* register B address */ -#define RTC_SET 0x80 /* stop updates for time set */ -#define RTC_PIE 0x40 /* Periodic interrupt enable */ -#define RTC_AIE 0x20 /* Alarm interrupt enable */ -#define RTC_UIE 0x10 /* Update ended interrupt enable */ -#define RTC_SQWE 0x08 /* Square wave enable */ -#define RTC_DM 0x04 /* Date mode, 1 = binary, 0 = BCD */ -#define RTC_HM 0x02 /* hour mode, 1 = 24 hour, 0 = 12 hour */ -#define RTC_DSE 0x01 /* Daylight savings enable */ - -/* - * Register C definitions - */ -#define RTC_C 0x0c /* register C address */ -#define RTC_IRQF 0x80 /* IRQ flag */ -#define RTC_PF 0x40 /* PF flag bit */ -#define RTC_AF 0x20 /* AF flag bit */ -#define RTC_UF 0x10 /* UF flag bit */ - -/* - * Register D definitions - */ -#define RTC_D 0x0d /* register D address */ -#define RTC_VRT 0x80 /* Valid RAM and time bit */ - -#define RTC_NREG 0x0e /* number of RTC registers */ -#define RTC_NREGP 0x0a /* number of RTC registers to set time */ - -#define RTCRTIME _IOR('c', 0x01, struct rtc_st) /* Read time from RTC */ -#define RTCSTIME _IOW('c', 0x02, struct rtc_st) /* Set time into RTC */ - -struct rtc_st { - char rtc_sec; - char rtc_asec; - char rtc_min; - char rtc_amin; - char rtc_hr; - char rtc_ahr; - char rtc_dow; - char rtc_dom; - char rtc_mon; - char rtc_yr; - char rtc_statusa; - char rtc_statusb; - char rtc_statusc; - char rtc_statusd; -}; - -/* - * this macro reads contents of real time clock to specified buffer - */ -#define load_rtc(regs) \ -{\ - register int i; \ - \ - for (i = 0; i < RTC_NREG; i++) { \ - outb(RTC_ADDR, i); \ - (regs)[i] = inb(RTC_DATA); \ - } \ -} - -/* - * this macro writes contents of specified buffer to real time clock - */ -#define save_rtc(regs) \ -{ \ - register int i; \ - for (i = 0; i < RTC_NREGP; i++) { \ - outb(RTC_ADDR, i); \ - outb(RTC_DATA, (regs)[i]);\ - } \ -} - - diff --git a/osfmk/i386/Diagnostics.c b/osfmk/i386/Diagnostics.c new file mode 100644 index 000000000..eae3c6e66 --- /dev/null +++ b/osfmk/i386/Diagnostics.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_FREE_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ + +/* + * Author: Bill Angell, Apple + * Date: 10/auht-five + * + * Random diagnostics + * + * Try to keep the x86 selectors in-sync with the ppc selectors. + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern uint64_t lastNapClear; + +diagWork dgWork; +uint64_t lastNapClear = 0ULL; +uint64_t lastRuptClear = 0ULL; + +typedef struct pmdata { + uint64_t pmNapDur; /* Time since last query */ + pmStats_t pmd; /* Powermanagement statistics */ +} pmdata; + + + +int +diagCall64(__unused x86_saved_state_t * regs) +{ + panic("diagCall not yet supported for 64 bit tasks\n"); +} + + +int +diagCall(x86_saved_state_t * state) +{ + + uint32_t stk, curpos, i, j; + uint32_t selector, data; + int err; + uint64_t currNap, durNap; + x86_saved_state32_t *regs; + + assert(is_saved_state32(state)); + regs = saved_state32(state); + + if (!(dgWork.dgFlags & enaDiagSCs)) + return 0; /* If not enabled, cause an exception */ + + stk = regs->uesp; /* Point to the stack */ + err = copyin((user_addr_t) (stk + 4), (char *) &selector, sizeof(uint32_t)); /* Get the selector */ + if (err) { + return 0; /* Failed to fetch stack */ + } + switch (selector) { /* Select the routine */ + + case dgRuptStat: /* Suck Interruption statistics */ + + err = copyin((user_addr_t) (stk + 8), (char *) &data, sizeof(uint32_t)); /* Get the selector */ + + if (data == 0) {/* If number of processors is 0, clear all + * counts */ + for (i = 0; i < real_ncpus; i++) { /* Cycle through + * processors */ + for (j = 0; j < 256; j++) + cpu_data_ptr[i]->cpu_hwIntCnt[j] = 0; + } + + lastRuptClear = mach_absolute_time(); /* Get the time of clear */ + return 1; /* Normal return */ + } + err = copyin((user_addr_t) (stk + 8), (char *) &data, sizeof(uint32_t)); /* Get the selector */ + + (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of + * processors */ + + currNap = mach_absolute_time(); /* Get the time now */ + durNap = currNap - lastRuptClear; /* Get the last interval + * duration */ + if (durNap == 0) + durNap = 1; /* This is a very short time, make it + * bigger */ + + curpos = data + sizeof(real_ncpus); /* Point to the next + * available spot */ + + for (i = 0; i < real_ncpus; i++) { /* Move 'em all out */ + (void) copyout((char *) &durNap, curpos, 8); /* Copy out the time + * since last clear */ + (void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t)); /* Copy out interrupt + * data for this + * processor */ + curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put + * slot */ + } + + break; + + default: /* Handle invalid ones */ + return 0; /* Return an exception */ + + } + + return 1; /* Normal non-ast check return */ +} diff --git a/osfmk/i386/Diagnostics.h b/osfmk/i386/Diagnostics.h new file mode 100644 index 000000000..bebd43f20 --- /dev/null +++ b/osfmk/i386/Diagnostics.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_FREE_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ + +/* + * Here are the Diagnostic interface interfaces + * Lovingly crafted by Bill Angell using traditional methods + * Keep selectors in sync with the PPC version where possible. + */ +#ifdef KERNEL_PRIVATE + +#ifndef _DIAGNOSTICS_H_ +#define _DIAGNOSTICS_H_ + +#ifdef __ppc__ +#error This file is not useful on PowerPC. +#endif + +int diagCall(x86_saved_state_t *regs); +int diagCall64(x86_saved_state_t *regs); + +#define diagSCnum 0x00006000 + +#define dgAdjTB 0 +#define dgLRA 1 +#define dgpcpy 2 +#define dgreset 3 +#define dgtest 4 +#define dgBMphys 5 +#define dgUnMap 6 +#define dgBootScreen 7 +#define dgFlush 8 +#define dgAlign 9 +#define dgprw 10 +#define dgmck 11 +#define dg64 12 +#define dgProbeRead 13 +#define dgCPNull 14 +#define dgPerfMon 15 +#define dgMapPage 16 +#define dgScom 17 +#define dgBind 18 +#define dgPproc 19 +#define dgAcntg 20 +#define dgKlra 21 +#define dgKfree 22 +#define dgWar 23 +#define dgNapStat 24 +#define dgRuptStat 25 + + +typedef struct diagWork { /* Diagnostic work area */ + + unsigned int dgLock; /* Lock if needed */ + unsigned int dgFlags; /* Flags */ +#define enaExpTrace 0x00000001 +#define enaExpTraceb 31 +#define enaUsrFCall 0x00000002 +#define enaUsrFCallb 30 +#define enaUsrPhyMp 0x00000004 +#define enaUsrPhyMpb 29 +#define enaDiagSCs 0x00000008 +#define enaDiagSCsb 28 +#define enaDiagDM 0x00000010 +#define enaDiagSDMb 27 +#define enaDiagEM 0x00000020 +#define enaDiagEMb 26 +#define enaDiagTrap 0x00000040 +#define enaDiagTrapb 25 +#define enaNotifyEM 0x00000080 +#define enaNotifyEMb 24 + + unsigned int dgMisc0; + unsigned int dgMisc1; + unsigned int dgMisc2; + unsigned int dgMisc3; + unsigned int dgMisc4; + unsigned int dgMisc5; + +} diagWork; + +extern diagWork dgWork; + + +#endif /* _DIAGNOSTICS_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile index ae6a4000d..2ce73da27 100644 --- a/osfmk/i386/Makefile +++ b/osfmk/i386/Makefile @@ -9,9 +9,13 @@ include $(MakeInc_def) EXPORT_ONLY_FILES = \ apic.h \ + asm.h \ cpu_number.h \ cpu_capabilities.h \ + cpu_data.h \ cpuid.h \ + eflags.h \ + hpet.h \ io_map_entries.h \ lock.h \ locks.h \ @@ -21,13 +25,20 @@ EXPORT_ONLY_FILES = \ mp.h \ mp_desc.h \ mp_events.h \ + pmCPU.h \ + pmap.h \ proc_reg.h \ + rtclock.h \ seg.h \ simple_lock.h \ + tsc.h \ tss.h INSTALL_MD_DIR = i386 +INSTALL_MD_LIST = eflags.h \ + user_ldt.h + INSTALL_MD_LCL_LIST = cpu_capabilities.h EXPORT_MD_LIST = ${EXPORT_ONLY_FILES} diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index e748f6161..10a1b91cc 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -25,7 +25,9 @@ #include #include #include +#include #include +#include #include @@ -41,11 +43,13 @@ extern unsigned int disableSerialOuput; extern void set_kbd_leds(int leds); +extern void fpinit(void); + vm_offset_t acpi_install_wake_handler(void) { /* copy wake code to ACPI_WAKE_ADDR in low memory */ - bcopy_phys((addr64_t) kvtophys((vm_offset_t)acpi_wake_start), + bcopy_phys(kvtophys((vm_offset_t)acpi_wake_start), (addr64_t) ACPI_WAKE_ADDR, acpi_wake_end - acpi_wake_start); @@ -64,12 +68,28 @@ typedef struct acpi_hibernate_callback_data { static void acpi_hibernate(void *refcon) { - boolean_t hib; + boolean_t dohalt; acpi_hibernate_callback_data *data = (acpi_hibernate_callback_data *)refcon; if (current_cpu_datap()->cpu_hibernate) { - hib = hibernate_write_image(); + + dohalt = hibernate_write_image(); + if (dohalt) + { + // off + HIBLOG("power off\n"); + if (PE_halt_restart) + (*PE_halt_restart)(kPEHaltCPU); + } + else + { + // sleep + HIBLOG("sleep\n"); + + // should we come back via regular wake, set the state in memory. + cpu_datap(0)->cpu_hibernate = 0; + } } (data->func)(data->refcon); @@ -83,51 +103,82 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) acpi_hibernate_callback_data data; boolean_t did_hibernate; - /* shutdown local APIC before passing control to BIOS */ - lapic_shutdown(); + kprintf("acpi_sleep_kernel hib=%d\n", current_cpu_datap()->cpu_hibernate); + + /* shutdown local APIC before passing control to BIOS */ + lapic_shutdown(); data.func = func; data.refcon = refcon; - /* - * Save master CPU state and sleep platform. - * Will not return until platform is woken up, - * or if sleep failed. - */ + /* Save HPET state */ + hpet_save(); + + /* + * If we're in 64-bit mode, drop back into legacy mode during sleep. + */ + if (cpu_mode_is64bit()) { + cpu_IA32e_disable(current_cpu_datap()); + kprintf("acpi_sleep_kernel legacy mode re-entered\n"); + } + + /* + * Save master CPU state and sleep platform. + * Will not return until platform is woken up, + * or if sleep failed. + */ acpi_sleep_cpu(acpi_hibernate, &data); - /* reset UART if kprintf is enabled */ - if (FALSE == disableSerialOuput) - serial_init(); + /* reset UART if kprintf is enabled */ + if (FALSE == disableSerialOuput) + serial_init(); + + kprintf("ret from acpi_sleep_cpu hib=%d\n", current_cpu_datap()->cpu_hibernate); if (current_cpu_datap()->cpu_hibernate) { - * (int *) CM1 = 0; - * (int *) CM2 = 0; - * (int *) CM3 = 0; + int i; + for (i=0; icpu_pmap->mapwindow[i].prv_CMAP = 0; + } + current_cpu_datap()->cpu_hibernate = 0; + did_hibernate = TRUE; - current_cpu_datap()->cpu_hibernate = 0; + } else { + did_hibernate = FALSE; + } - did_hibernate = TRUE; + /* Re-enable 64-bit mode if necessary. */ + if (cpu_mode_is64bit()) { + cpu_IA32e_enable(current_cpu_datap()); + cpu_desc_load64(current_cpu_datap()); + kprintf("acpi_sleep_kernel 64-bit mode re-enabled\n"); + fast_syscall_init64(); } else { - did_hibernate = FALSE; + fast_syscall_init(); } - /* restore MTRR settings */ - mtrr_update_cpu(); + /* restore MTRR settings */ + mtrr_update_cpu(); - /* set up PAT following boot processor power up */ - pat_init(); + /* set up PAT following boot processor power up */ + pat_init(); if (did_hibernate) { hibernate_machine_init(); } - /* re-enable and re-init local apic */ - if (lapic_probe()) - lapic_init(); + /* re-enable and re-init local apic */ + if (lapic_probe()) + lapic_init(); + + /* Restore HPET state */ + hpet_restore(); + + /* let the realtime clock reset */ + rtc_sleep_wakeup(); - /* let the realtime clock reset */ - rtc_sleep_wakeup(); + fpinit(); + clear_fpu(); if (did_hibernate) { enable_preemption(); diff --git a/osfmk/i386/acpi_wakeup.s b/osfmk/i386/acpi_wakeup.s index 931a72740..9dc633a76 100644 --- a/osfmk/i386/acpi_wakeup.s +++ b/osfmk/i386/acpi_wakeup.s @@ -36,7 +36,7 @@ .long address - EXT(acpi_wake_start) ;\ .word segment -#define PA(addr) ((addr)-KERNELBASE) +#define PA(addr) (addr) /* * acpi_wake_start @@ -145,6 +145,10 @@ ENTRY(acpi_sleep_cpu) pushal movl %esp, saved_esp + /* make sure tlb is flushed */ + movl %cr3,%eax + movl %eax,%cr3 + /* save control registers */ movl %cr0, %eax movl %eax, saved_cr0 @@ -186,43 +190,27 @@ ENTRY(acpi_sleep_cpu) jmp wake_restore wake_prot: - /* protected mode, paging disabled */ POSTCODE(ACPI_WAKE_PROT_ENTRY) + movl PA(saved_cr3), %ebx + movl PA(saved_cr4), %ecx + /* + * restore cr3, PAE and NXE states in an orderly fashion + */ + movl %ebx, %cr3 + movl %ecx, %cr4 + + movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ + /* restore kernel GDT */ lgdt PA(saved_gdt) - /* restore control registers */ movl PA(saved_cr2), %eax movl %eax, %cr2 - -#ifdef PAE - movl PA(EXT(IdlePDPT)), %eax - movl (%eax), %esi /* save orig */ - movl 24(%eax), %ebx - movl %ebx, (%eax) /* identity map low mem */ - movl %eax, %cr3 - - movl PA(saved_cr4), %eax - movl %eax, %cr4 -#else - movl PA(saved_cr4), %eax - movl %eax, %cr4 - - /* - * Temporarily use the page tables at IdlePTD - * to enable paging. Copy the KPTDI entry to - * entry 0 in the PTD to identity map the kernel. - */ - movl PA(EXT(IdlePTD)), %eax - movl %eax, %ebx - addl $(KPTDI << PTEINDX), %ebx /* bytes per PDE */ - movl (%ebx), %ebx /* IdlePTD[KPTDI] */ - movl (%eax), %esi /* save original IdlePTD[0] */ - movl %ebx, (%eax) /* update IdlePTD[0] */ - movl %eax, %cr3 /* CR3 = IdlePTD */ -#endif /* restore CR0, paging enabled */ movl PA(saved_cr0), %eax @@ -240,20 +228,6 @@ wake_paged: movw $(KERNEL_DS), %ax movw %ax, %ds - /* undo changes to IdlePTD */ -#ifdef PAE - movl EXT(IdlePDPT), %eax -#else - movl EXT(IdlePTD), %eax -#endif - addl $(KERNELBASE), %eax /* make virtual */ - movl %esi, (%eax) - - /* restore real PDE base */ - movl saved_cr3, %eax - movl %eax, %cr3 - - /* restore local and interrupt descriptor tables */ lldt saved_ldt lidt saved_idt @@ -293,29 +267,36 @@ wake_restore: .globl EXT(acpi_wake_prot_entry) ENTRY(acpi_wake_prot_entry) /* protected mode, paging enabled */ + POSTCODE(ACPI_WAKE_PAGED_ENTRY) /* restore kernel GDT */ - lgdt PA(saved_gdt) - + lgdt saved_gdt + POSTCODE(0x40) + /* restore control registers */ + + movl saved_cr0, %eax + movl %eax, %cr0 + movl saved_cr2, %eax movl %eax, %cr2 POSTCODE(0x3E) - /* switch to kernel data segment */ - movw $(KERNEL_DS), %ax - movw %ax, %ds - POSTCODE(0x3D) /* restore real PDE base */ movl saved_cr3, %eax movl saved_cr4, %edx movl %eax, %cr3 movl %edx, %cr4 + movl %eax, %cr3 - POSTCODE(0x3C) + /* switch to kernel data segment */ + movw $(KERNEL_DS), %ax + movw %ax, %ds + + POSTCODE(0x3C) /* restore local and interrupt descriptor tables */ lldt saved_ldt lidt saved_idt @@ -362,6 +343,7 @@ ENTRY(acpi_wake_prot_entry) /* * CPU registers saved across sleep/wake. */ + saved_esp: .long 0 saved_es: .word 0 saved_fs: .word 0 diff --git a/osfmk/i386/asm.h b/osfmk/i386/asm.h index fb714c558..fcaffc055 100644 --- a/osfmk/i386/asm.h +++ b/osfmk/i386/asm.h @@ -72,6 +72,7 @@ #define S_ARG1 8(%esp) #define S_ARG2 12(%esp) #define S_ARG3 16(%esp) +#define S_ARG4 20(%esp) #define FRAME pushl %ebp; movl %esp, %ebp #define EMARF leave @@ -85,7 +86,7 @@ /* There is another definition of ALIGN for .c sources */ #ifdef ASSEMBLER -#define ALIGN 2,0x90 +#define ALIGN 4,0x90 #endif /* ASSEMBLER */ #ifndef FALIGN diff --git a/osfmk/i386/asm64.h b/osfmk/i386/asm64.h new file mode 100644 index 000000000..a47fcf369 --- /dev/null +++ b/osfmk/i386/asm64.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* Helper macros for 64-bit mode switching */ + +/* + * Long jump to 64-bit space from 32-bit compatibility mode. + */ +#define ENTER_64BIT_MODE() \ + .code32 ;\ + .byte 0xea /* far jump longmode */ ;\ + .long 1f ;\ + .word KERNEL64_CS ;\ + .code64 ;\ +1: + +/* + * Here in long mode but still running below 4G. + * "Near" jump into uber-space. + */ +#define ENTER_UBERSPACE() \ + mov 2f,%rax ;\ + jmp *%rax ;\ +2: .long 3f ;\ + .long KERNEL_UBER_BASE_HI32 ;\ +3: + +/* + * Long jump to 32-bit compatibility mode from 64-bit space. + */ +#define ENTER_COMPAT_MODE() \ + ljmp *4f ;\ +4: .long 5f ;\ + .word KERNEL_CS ;\ + .code32 ;\ +5: + diff --git a/osfmk/i386/ast.h b/osfmk/i386/ast.h index 9f201def5..46f5db9fe 100644 --- a/osfmk/i386/ast.h +++ b/osfmk/i386/ast.h @@ -56,13 +56,6 @@ /* * Machine-dependent AST file for machines with no hardware AST support. * - * For the I386, we define AST_I386_FP to handle delayed - * floating-point exceptions. The FPU may interrupt on errors - * while the user is not running (in kernel or other thread running). */ -#define AST_I386_FP 0x80000000 -#define MACHINE_AST_PER_THREAD AST_I386_FP - - #endif /* _I386_AST_H_ */ diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index 759d96259..c3995ff1d 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -59,12 +59,16 @@ #include #include #include +#include +#include #include #include #include +#include #include <../bsd/sys/sysent.h> extern struct proc *current_proc(void); +extern struct proc * kernproc; kern_return_t thread_userstack( @@ -73,7 +77,7 @@ thread_userstack( thread_state_t, unsigned int, mach_vm_offset_t *, - int * + int * ); kern_return_t @@ -85,6 +89,8 @@ thread_entrypoint( mach_vm_offset_t * ); +void * find_user_regs(thread_t); + unsigned int get_msr_exportmask(void); unsigned int get_msr_nbits(void); @@ -104,110 +110,99 @@ void IOSleep(int); */ kern_return_t thread_userstack( - __unused thread_t thread, - int flavor, - thread_state_t tstate, - unsigned int count, - user_addr_t *user_stack, - int *customstack -) + __unused thread_t thread, + int flavor, + thread_state_t tstate, + __unused unsigned int count, + user_addr_t *user_stack, + int *customstack + ) { - struct i386_saved_state *state; - i386_thread_state_t *state25; - vm_offset_t uesp; - if (customstack) - *customstack = 0; + *customstack = 0; switch (flavor) { - case i386_THREAD_STATE: /* FIXME */ - state25 = (i386_thread_state_t *) tstate; + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: + { + x86_thread_state32_t *state25; + + state25 = (x86_thread_state32_t *) tstate; + if (state25->esp) *user_stack = state25->esp; else - *user_stack = USRSTACK; + *user_stack = VM_USRSTACK32; if (customstack && state25->esp) *customstack = 1; else *customstack = 0; break; + } - case i386_NEW_THREAD_STATE: - if (count < i386_NEW_THREAD_STATE_COUNT) - return (KERN_INVALID_ARGUMENT); - else { - state = (struct i386_saved_state *) tstate; - uesp = state->uesp; - } + case x86_THREAD_STATE64: + { + x86_thread_state64_t *state25; + + state25 = (x86_thread_state64_t *) tstate; - /* If a valid user stack is specified, use it. */ - if (uesp) - *user_stack = uesp; + if (state25->rsp) + *user_stack = state25->rsp; else - *user_stack = USRSTACK; - if (customstack && uesp) + *user_stack = VM_USRSTACK64; + if (customstack && state25->rsp) *customstack = 1; else *customstack = 0; - break; - default : + break; + } + + default : return (KERN_INVALID_ARGUMENT); } return (KERN_SUCCESS); } + kern_return_t thread_entrypoint( - __unused thread_t thread, - int flavor, - thread_state_t tstate, - unsigned int count, - mach_vm_offset_t *entry_point -) + __unused thread_t thread, + int flavor, + thread_state_t tstate, + __unused unsigned int count, + mach_vm_offset_t *entry_point + ) { - struct i386_saved_state *state; - i386_thread_state_t *state25; - - /* - * Set a default. - */ - if (*entry_point == 0) - *entry_point = VM_MIN_ADDRESS; + /* + * Set a default. + */ + if (*entry_point == 0) + *entry_point = VM_MIN_ADDRESS; - switch (flavor) { - case i386_THREAD_STATE: - state25 = (i386_thread_state_t *) tstate; - *entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS; - break; - - case i386_NEW_THREAD_STATE: - if (count < i386_THREAD_STATE_COUNT) - return (KERN_INVALID_ARGUMENT); - else { - state = (struct i386_saved_state *) tstate; - - /* - * If a valid entry point is specified, use it. - */ - *entry_point = state->eip ? state->eip: VM_MIN_ADDRESS; - } - break; - } + switch (flavor) { + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: + { + x86_thread_state32_t *state25; + + state25 = (x86_thread_state32_t *) tstate; + *entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS; + break; + } + case x86_THREAD_STATE64: + { + x86_thread_state64_t *state25; + + state25 = (x86_thread_state64_t *) tstate; + *entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64; + break; + } + } return (KERN_SUCCESS); } -struct i386_saved_state * -get_user_regs(thread_t th) -{ - if (th->machine.pcb) - return(USER_REGS(th)); - else { - printf("[get_user_regs: thread does not have pcb]"); - return NULL; - } -} /* * Duplicate parent state in child @@ -219,43 +214,37 @@ machine_thread_dup( thread_t child ) { - struct i386_float_state floatregs; - -#ifdef XXX - /* Save the FPU state */ - if ((pcb_t)(per_proc_info[cpu_number()].fpu_pcb) == parent->machine.pcb) { - fp_state_save(parent); - } -#endif - if (child->machine.pcb == NULL || parent->machine.pcb == NULL) return (KERN_FAILURE); + /* + * Copy over the i386_saved_state registers + */ + if (cpu_mode_is64bit()) { + if (thread_is_64bit(parent)) + bcopy(USER_REGS64(parent), USER_REGS64(child), sizeof(x86_saved_state64_t)); + else + bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state_compat32_t)); + } else + bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state32_t)); - /* Copy over the i386_saved_state registers */ - child->machine.pcb->iss = parent->machine.pcb->iss; - - /* Check to see if parent is using floating point + /* + * Check to see if parent is using floating point * and if so, copy the registers to the child - * FIXME - make sure this works. */ + fpu_dup_fxstate(parent, child); - if (parent->machine.pcb->ims.ifps) { - if (fpu_get_state(parent, &floatregs) == KERN_SUCCESS) - fpu_set_state(child, &floatregs); - } - - /* FIXME - should a user specified LDT, TSS and V86 info + /* + * FIXME - should a user specified LDT, TSS and V86 info * be duplicated as well?? - probably not. */ // duplicate any use LDT entry that was set I think this is appropriate. #ifdef MACH_BSD if (parent->machine.pcb->uldt_selector!= 0) { - child->machine.pcb->uldt_selector = parent->machine.pcb->uldt_selector; - child->machine.pcb->uldt_desc = parent->machine.pcb->uldt_desc; + child->machine.pcb->uldt_selector = parent->machine.pcb->uldt_selector; + child->machine.pcb->uldt_desc = parent->machine.pcb->uldt_desc; } #endif - return (KERN_SUCCESS); } @@ -267,17 +256,49 @@ void thread_set_child(thread_t child, int pid); void thread_set_child(thread_t child, int pid) { - child->machine.pcb->iss.eax = pid; - child->machine.pcb->iss.edx = 1; - child->machine.pcb->iss.efl &= ~EFL_CF; + + if (thread_is_64bit(child)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(child); + + iss64->rax = pid; + iss64->rdx = 1; + iss64->isf.rflags &= ~EFL_CF; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(child); + + iss32->eax = pid; + iss32->edx = 1; + iss32->efl &= ~EFL_CF; + } } + + void thread_set_parent(thread_t parent, int pid); void thread_set_parent(thread_t parent, int pid) { - parent->machine.pcb->iss.eax = pid; - parent->machine.pcb->iss.edx = 0; - parent->machine.pcb->iss.efl &= ~EFL_CF; + + if (thread_is_64bit(parent)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(parent); + + iss64->rax = pid; + iss64->rdx = 0; + iss64->isf.rflags &= ~EFL_CF; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(parent); + + iss32->eax = pid; + iss32->edx = 0; + iss32->efl &= ~EFL_CF; + } } @@ -286,24 +307,10 @@ thread_set_parent(thread_t parent, int pid) * System Call handling code */ -#define ERESTART -1 /* restart syscall */ -#define EJUSTRETURN -2 /* don't modify regs, just return */ - - -#define NO_FUNNEL 0 -#define KERNEL_FUNNEL 1 - -extern funnel_t * kernel_flock; - -extern int set_bsduthreadargs (thread_t, struct i386_saved_state *, void *); -extern void * get_bsduthreadarg(thread_t); -extern int * get_bsduthreadrval(thread_t th); -extern int * get_bsduthreadlowpridelay(thread_t th); +extern struct proc * i386_current_proc(void); extern long fuword(vm_offset_t); -extern void unix_syscall(struct i386_saved_state *); -extern void unix_syscall_return(int); /* following implemented in bsd/dev/i386/unix_signal.c */ int __pthread_cset(struct sysent *); @@ -312,243 +319,117 @@ void __pthread_creset(struct sysent *); void -unix_syscall_return(int error) +machdep_syscall(x86_saved_state_t *state) { - thread_t thread; - volatile int *rval; - struct i386_saved_state *regs; - struct proc *p; - unsigned short code; - vm_offset_t params; - struct sysent *callp; - volatile int *lowpri_delay; - - thread = current_thread(); - rval = get_bsduthreadrval(thread); - lowpri_delay = get_bsduthreadlowpridelay(thread); - p = current_proc(); - - regs = USER_REGS(thread); - - /* reconstruct code for tracing before blasting eax */ - code = regs->eax; - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - if (callp == sysent) { - code = fuword(params); - } - - if (error == ERESTART) { - regs->eip -= 7; - } - else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ - } else { /* (not error) */ - regs->eax = rval[0]; - regs->edx = rval[1]; - regs->efl &= ~EFL_CF; - } - } - - ktrsysret(p, code, error, rval[0], (callp->sy_funnel & FUNNEL_MASK)); - - __pthread_creset(callp); - - if ((callp->sy_funnel & FUNNEL_MASK) != NO_FUNNEL) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - - if (*lowpri_delay) { - /* - * task is marked as a low priority I/O type - * and the I/O we issued while in this system call - * collided with normal I/O operations... we'll - * delay in order to mitigate the impact of this - * task on the normal operation of the system - */ - IOSleep(*lowpri_delay); - *lowpri_delay = 0; - } - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); - - thread_exception_return(); - /* NOTREACHED */ -} - - -void -unix_syscall(struct i386_saved_state *regs) -{ - thread_t thread; - void *vt; - unsigned short code; - struct sysent *callp; - int nargs; - int error; - int *rval; - int funnel_type; - vm_offset_t params; - struct proc *p; - volatile int *lowpri_delay; - - thread = current_thread(); - p = current_proc(); - rval = get_bsduthreadrval(thread); - lowpri_delay = get_bsduthreadlowpridelay(thread); - - thread->task->syscalls_unix++; /* MP-safety ignored */ - - //printf("[scall : eax %x]", regs->eax); - code = regs->eax; - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - if (callp == sysent) { - code = fuword(params); - params += sizeof (int); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - } + int args[machdep_call_count]; + int trapno; + int nargs; + machdep_call_t *entry; + x86_saved_state32_t *regs; + + assert(is_saved_state32(state)); + regs = saved_state32(state); - vt = get_bsduthreadarg(thread); + trapno = regs->eax; +#if DEBUG_TRACE + kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno); +#endif - if ((nargs = (callp->sy_narg * sizeof (int))) && - (error = copyin((user_addr_t) params, (char *) vt, nargs)) != 0) { - regs->eax = error; - regs->efl |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } - - rval[0] = 0; - rval[1] = regs->edx; + if (trapno < 0 || trapno >= machdep_call_count) { + regs->eax = (unsigned int)kern_invalid(NULL); - if ((error = __pthread_cset(callp))) { - /* cancelled system call; let it returned with EINTR for handling */ - regs->eax = error; - regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } + entry = &machdep_call_table[trapno]; + nargs = entry->nargs; - funnel_type = (callp->sy_funnel & FUNNEL_MASK); - if(funnel_type == KERNEL_FUNNEL) - (void) thread_funnel_set(kernel_flock, TRUE); - - (void) set_bsduthreadargs(thread, regs, NULL); - - if (callp->sy_narg > 8) - panic("unix_syscall max arg count exceeded (%d)", callp->sy_narg); - - ktrsyscall(p, code, callp->sy_narg, vt, funnel_type); - - { - int *ip = (int *)vt; - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - *ip, *(ip+1), *(ip+2), *(ip+3), 0); - } + if (nargs != 0) { + if (copyin((user_addr_t) regs->uesp + sizeof (int), + (char *) args, (nargs * sizeof (int)))) { + regs->eax = KERN_INVALID_ADDRESS; - error = (*(callp->sy_call))((void *) p, (void *) vt, &rval[0]); - -#if 0 - /* May be needed with vfork changes */ - regs = USER_REGS(thread); -#endif - if (error == ERESTART) { - regs->eip -= 7; + thread_exception_return(); + /* NOTREACHED */ + } } - else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ - } else { /* (not error) */ - regs->eax = rval[0]; - regs->edx = rval[1]; - regs->efl &= ~EFL_CF; - } - } - - ktrsysret(p, code, error, rval[0], funnel_type); - - __pthread_creset(callp); - - if(funnel_type != NO_FUNNEL) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + switch (nargs) { + case 0: + regs->eax = (*entry->routine.args_0)(); + break; + case 1: + regs->eax = (*entry->routine.args_1)(args[0]); + break; + case 2: + regs->eax = (*entry->routine.args_2)(args[0], args[1]); + break; + case 3: + if (!entry->bsd_style) + regs->eax = (*entry->routine.args_3)(args[0], args[1], args[2]); + else { + int error; + int rval; + + error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]); + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; /* carry bit */ + } else { + regs->eax = rval; + regs->efl &= ~EFL_CF; + } + } + break; + case 4: + regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]); + break; - if (*lowpri_delay) { - /* - * task is marked as a low priority I/O type - * and the I/O we issued while in this system call - * collided with normal I/O operations... we'll - * delay in order to mitigate the impact of this - * task on the normal operation of the system - */ - IOSleep(*lowpri_delay); - *lowpri_delay = 0; + default: + panic("machdep_syscall: too many args"); } - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); + if (current_thread()->funnel_lock) + (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ } void -machdep_syscall( struct i386_saved_state *regs) +machdep_syscall64(x86_saved_state_t *state) { - int trapno, nargs; - machdep_call_t *entry; - - trapno = regs->eax; - if (trapno < 0 || trapno >= machdep_call_count) { - regs->eax = (unsigned int)kern_invalid(NULL); + int trapno; + machdep_call_t *entry; + x86_saved_state64_t *regs; - thread_exception_return(); - /* NOTREACHED */ - } + assert(is_saved_state64(state)); + regs = saved_state64(state); - entry = &machdep_call_table[trapno]; - nargs = entry->nargs; - - if (nargs > 0) { - int args[nargs]; - - if (copyin((user_addr_t) regs->uesp + sizeof (int), - (char *) args, - nargs * sizeof (int))) { + trapno = regs->rax & SYSCALL_NUMBER_MASK; - regs->eax = KERN_INVALID_ADDRESS; + if (trapno < 0 || trapno >= machdep_call_count) { + regs->rax = (unsigned int)kern_invalid(NULL); - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ } + entry = &machdep_call_table64[trapno]; - switch (nargs) { - case 1: - regs->eax = (*entry->routine.args_1)(args[0]); - break; - case 2: - regs->eax = (*entry->routine.args_2)(args[0],args[1]); + switch (entry->nargs) { + case 0: + regs->rax = (*entry->routine.args_0)(); break; - case 3: - regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); - break; - case 4: - regs->eax = (*entry->routine.args_4)(args[0],args[1],args[2],args[3]); + case 1: + regs->rax = (*entry->routine.args64_1)(regs->rdi); break; default: - panic("machdep_syscall(): too many args"); + panic("machdep_syscall64: too many args"); } - } - else - regs->eax = (*entry->routine.args_0)(); - - if (current_thread()->funnel_lock) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + if (current_thread()->funnel_lock) + (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ } @@ -577,7 +458,7 @@ thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) kern_return_t thread_set_cthread_self(uint32_t self) { - current_thread()->machine.pcb->cthread_self = self; + current_thread()->machine.pcb->cthread_self = (uint64_t) self; return (KERN_SUCCESS); } @@ -591,11 +472,38 @@ thread_get_cthread_self(void) kern_return_t thread_fast_set_cthread_self(uint32_t self) { - pcb_t pcb; - pcb = (pcb_t)current_thread()->machine.pcb; - thread_compose_cthread_desc(self, pcb); - pcb->cthread_self = self; /* preserve old func too */ - return (USER_CTHREAD); + pcb_t pcb; + x86_saved_state32_t *iss; + + pcb = (pcb_t)current_thread()->machine.pcb; + thread_compose_cthread_desc(self, pcb); + pcb->cthread_self = (uint64_t) self; /* preserve old func too */ + iss = saved_state32(pcb->iss); + iss->gs = USER_CTHREAD; + + return (USER_CTHREAD); +} + +kern_return_t +thread_fast_set_cthread_self64(uint64_t self) +{ + pcb_t pcb; + x86_saved_state64_t *iss; + + pcb = current_thread()->machine.pcb; + + /* check for canonical address, set 0 otherwise */ + if (!IS_USERADDR64_CANONICAL(self)) + self = 0ULL; + pcb->cthread_self = self; + current_cpu_datap()->cpu_uber.cu_user_gs_base = self; + + /* XXX for 64-in-32 */ + iss = saved_state64(pcb->iss); + iss->gs = USER_CTHREAD; + thread_compose_cthread_desc((uint32_t) self, pcb); + + return (USER_CTHREAD); } /* @@ -665,137 +573,177 @@ thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags) return USER_SETTABLE; } -void -mach25_syscall(struct i386_saved_state *regs) -{ - printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n", - regs->eip, regs->eax, -regs->eax); - panic("FIXME!"); -} -#endif /* MACH_BSD */ - -/* This routine is called from assembly before each and every mach trap. - */ +#endif /* MACH_BSD */ -extern unsigned int mach_call_start(unsigned int, unsigned int *); -__private_extern__ -unsigned int -mach_call_start(unsigned int call_number, unsigned int *args) -{ - int i, argc; - unsigned int kdarg[3]; +typedef kern_return_t (*mach_call_t)(void *); - current_thread()->task->syscalls_mach++; /* MP-safety ignored */ +struct mach_call_args { + syscall_arg_t arg1; + syscall_arg_t arg2; + syscall_arg_t arg3; + syscall_arg_t arg4; + syscall_arg_t arg5; + syscall_arg_t arg6; + syscall_arg_t arg7; + syscall_arg_t arg8; + syscall_arg_t arg9; +}; -/* Always prepare to trace mach system calls */ - kdarg[0]=0; - kdarg[1]=0; - kdarg[2]=0; +static kern_return_t +mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args); - argc = mach_trap_table[call_number>>4].mach_trap_arg_count; - - if (argc > 3) - argc = 3; - - for (i=0; i < argc; i++) - kdarg[i] = (int)*(args + i); - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number>>4)) | DBG_FUNC_START, - kdarg[0], kdarg[1], kdarg[2], 0, 0); - return call_number; /* pass this back thru */ -} +static kern_return_t +mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args) +{ + unsigned int args32[9]; -/* This routine is called from assembly after each mach system call - */ + if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args32, nargs * sizeof (int))) + return KERN_INVALID_ARGUMENT; -extern unsigned int mach_call_end(unsigned int, unsigned int); + switch (nargs) { + case 9: args->arg9 = args32[8]; + case 8: args->arg8 = args32[7]; + case 7: args->arg7 = args32[6]; + case 6: args->arg6 = args32[5]; + case 5: args->arg5 = args32[4]; + case 4: args->arg4 = args32[3]; + case 3: args->arg3 = args32[2]; + case 2: args->arg2 = args32[1]; + case 1: args->arg1 = args32[0]; + } + if (call_number == 90) { + /* munge_l for mach_wait_until_trap() */ + args->arg1 = (((uint64_t)(args32[0])) | ((((uint64_t)(args32[1]))<<32))); + } + if (call_number == 93) { + /* munge_wl for mk_timer_arm_trap() */ + args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32))); + } -__private_extern__ -unsigned int -mach_call_end(unsigned int call_number, unsigned int retval) -{ - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number>>4)) | DBG_FUNC_END, - retval, 0, 0, 0, 0); - return retval; /* pass this back thru */ + return KERN_SUCCESS; } -typedef kern_return_t (*mach_call_t)(void *); -extern __attribute__((regparm(1))) kern_return_t -mach_call_munger(unsigned int call_number, - unsigned int arg1, - unsigned int arg2, - unsigned int arg3, - unsigned int arg4, - unsigned int arg5, - unsigned int arg6, - unsigned int arg7, - unsigned int arg8, - unsigned int arg9 -); +__private_extern__ void +mach_call_munger(x86_saved_state_t *state); + -struct mach_call_args { - unsigned int arg1; - unsigned int arg2; - unsigned int arg3; - unsigned int arg4; - unsigned int arg5; - unsigned int arg6; - unsigned int arg7; - unsigned int arg8; - unsigned int arg9; -}; __private_extern__ -__attribute__((regparm(1))) kern_return_t -mach_call_munger(unsigned int call_number, - unsigned int arg1, - unsigned int arg2, - unsigned int arg3, - unsigned int arg4, - unsigned int arg5, - unsigned int arg6, - unsigned int arg7, - unsigned int arg8, - unsigned int arg9 -) +void +mach_call_munger(x86_saved_state_t *state) { int argc; + int call_number; mach_call_t mach_call; kern_return_t retval; struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - current_thread()->task->syscalls_mach++; /* MP-safety ignored */ - call_number >>= 4; + x86_saved_state32_t *regs; + + assert(is_saved_state32(state)); + regs = saved_state32(state); + + call_number = -(regs->eax); +#if DEBUG_TRACE + kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); +#endif + + if (call_number < 0 || call_number >= mach_trap_count) { + i386_exception(EXC_SYSCALL, call_number, 1); + /* NOTREACHED */ + } + mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; + if (mach_call == (mach_call_t)kern_invalid) { + i386_exception(EXC_SYSCALL, call_number, 1); + /* NOTREACHED */ + } argc = mach_trap_table[call_number].mach_trap_arg_count; - switch (argc) { - case 9: args.arg9 = arg9; - case 8: args.arg8 = arg8; - case 7: args.arg7 = arg7; - case 6: args.arg6 = arg6; - case 5: args.arg5 = arg5; - case 4: args.arg4 = arg4; - case 3: args.arg3 = arg3; - case 2: args.arg2 = arg2; - case 1: args.arg1 = arg1; + + if (argc) { + retval = mach_call_arg_munger32(regs->uesp, argc, call_number, &args); + + if (retval != KERN_SUCCESS) { + regs->eax = retval; + + thread_exception_return(); + /* NOTREACHED */ + } } + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, + (int) args.arg1, (int) args.arg2, (int) args.arg3, (int) args.arg4, 0); + retval = mach_call(&args); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, + retval, 0, 0, 0, 0); + regs->eax = retval; + + thread_exception_return(); + /* NOTREACHED */ +} + + + +__private_extern__ void +mach_call_munger64(x86_saved_state_t *state); + + +__private_extern__ +void +mach_call_munger64(x86_saved_state_t *state) +{ + int call_number; + int argc; + mach_call_t mach_call; + x86_saved_state64_t *regs; + + assert(is_saved_state64(state)); + regs = saved_state64(state); + + call_number = regs->rax & SYSCALL_NUMBER_MASK; + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, - args.arg1, args.arg2, args.arg3, 0, 0); + (int) regs->rdi, (int) regs->rsi, (int) regs->rdx, (int) regs->r10, 0); + if (call_number < 0 || call_number >= mach_trap_count) { + i386_exception(EXC_SYSCALL, regs->rax, 1); + /* NOTREACHED */ + } mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; - retval = mach_call(&args); + if (mach_call == (mach_call_t)kern_invalid) { + i386_exception(EXC_SYSCALL, regs->rax, 1); + /* NOTREACHED */ + } + argc = mach_trap_table[call_number].mach_trap_arg_count; + + if (argc > 6) { + int copyin_count; + + copyin_count = (argc - 6) * sizeof(uint64_t); + + if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count)) { + regs->rax = KERN_INVALID_ARGUMENT; + + thread_exception_return(); + /* NOTREACHED */ + } + } + regs->rax = (uint64_t)mach_call((void *)(®s->rdi)); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, - retval, 0, 0, 0, 0); + (int)regs->rax, 0, 0, 0, 0); - return retval; + thread_exception_return(); + /* NOTREACHED */ } + + /* * thread_setuserstack: * @@ -807,9 +755,19 @@ thread_setuserstack( thread_t thread, mach_vm_address_t user_stack) { - struct i386_saved_state *ss = get_user_regs(thread); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thread); - ss->uesp = CAST_DOWN(unsigned int,user_stack); + iss64->isf.rsp = (uint64_t)user_stack; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->uesp = CAST_DOWN(unsigned int, user_stack); + } } /* @@ -823,10 +781,23 @@ thread_adjuserstack( thread_t thread, int adjust) { - struct i386_saved_state *ss = get_user_regs(thread); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; - ss->uesp += adjust; - return CAST_USER_ADDR_T(ss->uesp); + iss64 = USER_REGS64(thread); + + iss64->isf.rsp += adjust; + + return iss64->isf.rsp; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->uesp += adjust; + + return CAST_USER_ADDR_T(iss32->uesp); + } } /* @@ -836,12 +807,55 @@ thread_adjuserstack( * dependent thread state info. */ void -thread_setentrypoint( - thread_t thread, - mach_vm_address_t entry) +thread_setentrypoint(thread_t thread, mach_vm_address_t entry) { - struct i386_saved_state *ss = get_user_regs(thread); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; - ss->eip = CAST_DOWN(unsigned int,entry); -} + iss64 = USER_REGS64(thread); + + iss64->isf.rip = (uint64_t)entry; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->eip = CAST_DOWN(unsigned int, entry); + } +} + + +void +thread_setsinglestep(thread_t thread, int on) +{ + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thread); + + if (on) + iss64->isf.rflags |= EFL_TF; + else + iss64->isf.rflags &= ~EFL_TF; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + if (on) + iss32->efl |= EFL_TF; + else + iss32->efl &= ~EFL_TF; + } +} + + + +/* XXX this should be a struct savearea so that CHUD will work better on x86 */ +void * +find_user_regs( + thread_t thread) +{ + return USER_STATE(thread); +} diff --git a/osfmk/i386/commpage/atomic.s b/osfmk/i386/commpage/atomic.s index ed50f43d8..790e08f38 100644 --- a/osfmk/i386/commpage/atomic.s +++ b/osfmk/i386/commpage/atomic.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -93,14 +93,14 @@ Lcompare_and_swap64_up: Lbit_test_and_set_mp: .long _COMM_PAGE_BTS+4 lock - bts %eax, (%edx) + btsl %eax, (%edx) ret COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) Lbit_test_and_set_up: .long _COMM_PAGE_BTS+4 - bts %eax, (%edx) + btsl %eax, (%edx) ret COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) @@ -114,14 +114,14 @@ Lbit_test_and_set_up: Lbit_test_and_clear_mp: .long _COMM_PAGE_BTC+4 lock - btc %eax, (%edx) + btrl %eax, (%edx) ret COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) Lbit_test_and_clear_up: .long _COMM_PAGE_BTC+4 - btc %eax, (%edx) + btrl %eax, (%edx) ret COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) @@ -147,3 +147,153 @@ Latomic_add32_up: ret COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) + + +/************************* x86_64 versions follow **************************/ + + +// This is a subroutine used by: + +// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); +// int32_t OSAtomicAnd32( int32_t mask, int32_t *value); +// int32_t OSAtomicOr32( int32_t mask, int32_t *value); +// int32_t OSAtomicXor32( int32_t mask, int32_t *value); + +// It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) +// new -> %rsi +// value -> %rdx +// on success: returns with ZF set +// on failure: returns with *value in %eax, ZF clear + + .code64 +Lcompare_and_swap32_mp_64: + movl %edi,%eax // put old value where "cmpxchg" wants it + lock + cmpxchgl %esi, (%rdx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) + + .code64 +Lcompare_and_swap32_up_64: + movl %edi,%eax // put old value where "cmpxchg" wants it + cmpxchgl %esi, (%rdx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); + +// It assumes: old -> %rdi (ie, it follows the ABI parameter conventions) +// new -> %rsi +// value -> %rdx +// on success: returns with ZF set +// on failure: returns with *value in %rax, ZF clear + + .code64 +Lcompare_and_swap64_mp_64: + movq %rdi,%rax // put old value where "cmpxchg" wants it + lock + cmpxchgq %rsi, (%rdx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) + + .code64 +Lcompare_and_swap64_up_64: + movq %rdi,%rax // put old value where "cmpxchg" wants it + cmpxchgq %rsi, (%rdx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicTestAndSet( uint32_t n, void *value ); +// It is called with standard register conventions: +// n = %rdi +// value = %rsi +// Returns: old value of bit in CF + + .code64 +Lbit_test_and_set_mp_64: + lock + btsl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP) + + .code64 +Lbit_test_and_set_up_64: + btsl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicTestAndClear( uint32_t n, void *value ); +// It is called with standard register conventions: +// n = %rdi +// value = %rsi +// Returns: old value of bit in CF + + .code64 +Lbit_test_and_clear_mp_64: + lock + btrl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP) + + .code64 +Lbit_test_and_clear_up_64: + btrl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0) + +// This is a subroutine used by: +// int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); +// It is called with standard register conventions: +// amt = %rdi +// value = %rsi +// Returns: old value in %edi +// NB: OSAtomicAdd32 returns the new value, so clients will add amt to %edi + + .code64 +Latomic_add32_mp_64: + lock + xaddl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP) + + .code64 +Latomic_add32_up_64: + xaddl %edi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0) + +// This is a subroutine used by: +// int64_t OSAtomicAdd64( int64_t amt, int64_t *value ); +// It is called with standard register conventions: +// amt = %rdi +// value = %rsi +// Returns: old value in %rdi +// NB: OSAtomicAdd64 returns the new value, so clients will add amt to %rdi + + .code64 +Latomic_add64_mp_64: + lock + xaddq %rdi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP) + + .code64 +Latomic_add64_up_64: + xaddq %rdi, (%rsi) + ret + + COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0) diff --git a/osfmk/i386/commpage/bcopy_scalar.s b/osfmk/i386/commpage/bcopy_scalar.s index 7f322f548..d89188d7c 100644 --- a/osfmk/i386/commpage/bcopy_scalar.s +++ b/osfmk/i386/commpage/bcopy_scalar.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,10 +68,12 @@ .text .align 5, 0x90 Lbcopy_scalar: + pushl %ebp /* set up a frame for backtraces */ + movl %esp,%ebp pushl %esi pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi + movl 8(%ebp),%esi + movl 12(%ebp),%edi jmp 1f /* ** These need to be 32 bytes from Lbcopy_scalar @@ -79,13 +81,15 @@ Lbcopy_scalar: .align 5, 0x90 Lmemcpy_scalar: Lmemmove_scalar: + pushl %ebp /* set up a frame for backtraces */ + movl %esp,%ebp pushl %esi pushl %edi - movl 12(%esp),%edi - movl 16(%esp),%esi + movl 8(%ebp),%edi + movl 12(%ebp),%esi movl %edi,%eax 1: - movl 20(%esp),%ecx + movl 16(%ebp),%ecx movl %edi,%edx subl %esi,%edx cmpl %ecx,%edx /* overlapping? */ @@ -101,6 +105,7 @@ Lmemmove_scalar: movsb popl %edi popl %esi + popl %ebp ret 2: addl %ecx,%edi /* copy backwards. */ @@ -120,7 +125,8 @@ Lmemmove_scalar: movsl popl %edi popl %esi + popl %ebp cld ret - COMMPAGE_DESCRIPTOR(bcopy_scalar,_COMM_PAGE_BCOPY,0,0) + COMMPAGE_DESCRIPTOR(bcopy_scalar,_COMM_PAGE_BCOPY,0,kHasSSE2+kHasSupplementalSSE3) diff --git a/osfmk/i386/commpage/bcopy_sse3.s b/osfmk/i386/commpage/bcopy_sse3.s new file mode 100644 index 000000000..21d7b8a12 --- /dev/null +++ b/osfmk/i386/commpage/bcopy_sse3.s @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * The bcopy/memcpy loops, tuned for Pentium-M class processors with SSE3 + * and 64-byte cache lines, such as Core and Core 2. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (500*1024) // large enough for non-temporal stores (must be >= 8192) +#define kBigChunk (256*1024) // outer loop chunk size for kVeryLong sized operands +#define kFastUCode (16*1024) // cutoff for microcode fastpath for "rep/movsl" + + +// void bcopy(const void *src, void *dst, size_t len); + + .text + .align 5, 0x90 +Lbcopy_sse3: // void bcopy(const void *src, void *dst, size_t len) + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %esi + pushl %edi + movl 8(%ebp),%esi // get source ptr + movl 12(%ebp),%edi // get dest ptr + jmp Ljoin + +// +// void *memcpy(void *dst, const void *src, size_t len); +// void *memmove(void *dst, const void *src, size_t len); +// +// NB: These need to be 32 bytes from bcopy(): +// + + .align 5, 0x90 +Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len) +Lmemmove: // void *memmove(void *dst, const void *src, size_t len) + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %esi + pushl %edi + movl 8(%ebp),%edi // get dest ptr + movl 12(%ebp),%esi // get source ptr + +Ljoin: // here from bcopy() with esi and edi loaded + movl 16(%ebp),%ecx // get length + movl %edi,%edx + subl %esi,%edx // (dest - source) + cmpl %ecx,%edx // must move in reverse if (dest - source) < length + jb LReverseIsland +Lrejoin: // here from very-long-operand copies + cmpl $(kShort),%ecx // long enough to bother with SSE? + ja LNotShort // yes + +// Handle short forward copies. As the most common case, this is the fall-through path. +// ecx = length (<= kShort) +// esi = source ptr +// edi = dest ptr + +Lshort: + movl %ecx,%edx // copy length + shrl $2,%ecx // get #doublewords + jz LLeftovers +2: // loop copying doublewords + movl (%esi),%eax + addl $4,%esi + movl %eax,(%edi) + addl $4,%edi + dec %ecx + jnz 2b +LLeftovers: // handle leftover bytes (0..3) in last word + andl $3,%edx // any leftover bytes? + jz 5f +4: // loop copying bytes + movb (%esi),%al + inc %esi + movb %al,(%edi) + inc %edi + dec %edx + jnz 4b +5: + movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove + popl %edi + popl %esi + popl %ebp + ret + + +LReverseIsland: // keep the "jb" above a short branch... + jmp LReverse // ...because reverse moves are uncommon + + +// Handle forward moves that are long enough to justify use of SSE3. +// First, 16-byte align the destination. +// ecx = length (> kShort) +// esi = source ptr +// edi = dest ptr + +LNotShort: + cmpl $(kVeryLong),%ecx // long enough to justify heavyweight loops? + movl %edi,%edx // copy destination + jae LVeryLong // use very-long-operand path + negl %edx + andl $15,%edx // get #bytes to align destination + jz LDestAligned // already aligned + subl %edx,%ecx // decrement length +1: // loop copying 1..15 bytes + movb (%esi),%al + inc %esi + movb %al,(%edi) + inc %edi + dec %edx + jnz 1b + +// Destination is now aligned. Prepare for forward loops over 64-byte chunks. +// Since kShort>=80 and we've moved at most 15 bytes already, there is at least one chunk. + +LDestAligned: + movl %ecx,%edx // copy length + movl %ecx,%eax // twice + andl $63,%ecx // get remaining bytes for Lshort + andl $-64,%edx // get number of bytes we will copy in inner loop + addl %edx,%esi // point to 1st byte not copied + addl %edx,%edi + negl %edx // now generate offset to 1st byte to be copied + testl $15,%esi // is source aligned too? + jnz LUnalignedLoop // no + + + cmpl $(kFastUCode),%eax // long enough for the fastpath in microcode? + jb LAlignedLoop // no, use SSE + cld // we'll move forward + movl %eax,%ecx // copy length again + shrl $2,%ecx // compute #words to move + addl %edx,%esi // restore ptrs to 1st byte of source and dest + addl %edx,%edi + rep // the u-code will optimize this + movsl + movl %eax,%edx // original length + jmp LLeftovers // handle 0..3 leftover bytes + + +// Forward aligned loop for medium length operands (kShort < n < kVeryLong). + + .align 4,0x90 // 16-byte align inner loops +LAlignedLoop: // loop over 64-byte chunks + movdqa (%esi,%edx),%xmm0 + movdqa 16(%esi,%edx),%xmm1 + movdqa 32(%esi,%edx),%xmm2 + movdqa 48(%esi,%edx),%xmm3 + + movdqa %xmm0,(%edi,%edx) + movdqa %xmm1,16(%edi,%edx) + movdqa %xmm2,32(%edi,%edx) + movdqa %xmm3,48(%edi,%edx) + + addl $64,%edx + jnz LAlignedLoop + + jmp Lshort // copy remaining 0..15 bytes and done + + +// Forward unaligned loop for medium length operands (kShort < n < kVeryLong). +// Note that LDDQU==MOVDQU on these machines, ie we don't care when we cross +// source cache lines. + + .align 4,0x90 // 16-byte align inner loops +LUnalignedLoop: // loop over 64-byte chunks + movdqu (%esi,%edx),%xmm0 // the loads are unaligned + movdqu 16(%esi,%edx),%xmm1 + movdqu 32(%esi,%edx),%xmm2 + movdqu 48(%esi,%edx),%xmm3 + + movdqa %xmm0,(%edi,%edx) // we can use aligned stores + movdqa %xmm1,16(%edi,%edx) + movdqa %xmm2,32(%edi,%edx) + movdqa %xmm3,48(%edi,%edx) + + addl $64,%edx + jnz LUnalignedLoop + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Very long forward moves. These are at least several pages, so we loop over big +// chunks of memory (kBigChunk in size.) We first prefetch the chunk, and then copy +// it using non-temporal stores. Hopefully all the reads occur in the prefetch loop, +// so the copy loop reads from L2 and writes directly to memory (with write combining.) +// This minimizes bus turnaround and maintains good DRAM page locality. +// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache +// size. Otherwise, it is counter-productive to bypass L2 on the stores. +// ecx = length (>= kVeryLong bytes) +// edi = dest (aligned) +// esi = source + +LVeryLong: + pushl %ebx // we'll need to use this + movl %edi,%ebx // copy dest ptr + negl %ebx + andl $63,%ebx // get #bytes to cache line align destination + jz LBigChunkLoop // already aligned + +// Cache line align destination, so temporal stores in copy loops work right. + + pushl %ecx // save total length remaining + pushl %ebx // arg3 - #bytes to align destination (1..63) + pushl %esi // arg2 - source + pushl %edi // arg1 - dest + call Lmemcpy // align the destination + movl 12(%esp),%ecx // recover total length + addl $16,%esp + addl %ebx,%esi // adjust ptrs and lengths past copy + addl %ebx,%edi + subl %ebx,%ecx + +// Loop over big chunks. +// ecx = length remaining (>= 4096) +// edi = dest (64-byte aligned) +// esi = source (may be unaligned) + +LBigChunkLoop: + movl $(kBigChunk),%edx // assume we can do a full chunk + cmpl %edx,%ecx // do we have a full chunk left to do? + cmovbl %ecx,%edx // if not, only move what we have left + andl $-4096,%edx // we work in page multiples + xor %eax,%eax // initialize chunk offset + jmp LTouchLoop + +// Because the source may be unaligned, we use byte loads to touch. +// ecx = length remaining (including this chunk) +// edi = ptr to start of dest chunk +// esi = ptr to start of source chunk +// edx = chunk length (multiples of pages) +// ebx = scratch reg used to read a byte of each cache line +// eax = chunk offset + + .align 4,0x90 // 16-byte align inner loops +LTouchLoop: + movzb (%esi,%eax),%ebx // touch line 0, 2, 4, or 6 of page + movzb 1*64(%esi,%eax),%ebx // touch line 1, 3, 5, or 7 + movzb 8*64(%esi,%eax),%ebx // touch line 8, 10, 12, or 14 + movzb 9*64(%esi,%eax),%ebx // etc + + movzb 16*64(%esi,%eax),%ebx + movzb 17*64(%esi,%eax),%ebx + movzb 24*64(%esi,%eax),%ebx + movzb 25*64(%esi,%eax),%ebx + + movzb 32*64(%esi,%eax),%ebx + movzb 33*64(%esi,%eax),%ebx + movzb 40*64(%esi,%eax),%ebx + movzb 41*64(%esi,%eax),%ebx + + movzb 48*64(%esi,%eax),%ebx + movzb 49*64(%esi,%eax),%ebx + movzb 56*64(%esi,%eax),%ebx + movzb 57*64(%esi,%eax),%ebx + + subl $-128,%eax // next slice of page (adding 128 w 8-bit immediate) + testl $512,%eax // done with this page? + jz LTouchLoop // no, next of four slices + addl $(4096-512),%eax // move on to next page + cmpl %eax,%edx // done with this chunk? + jnz LTouchLoop // no, do next page + +// The chunk has been pre-fetched, now copy it using non-temporal stores. +// There are two copy loops, depending on whether the source is 16-byte aligned +// or not. + + addl %edx,%esi // increment ptrs by chunk length + addl %edx,%edi + subl %edx,%ecx // adjust remaining length + negl %edx // prepare loop index (counts up to 0) + testl $15,%esi // is source 16-byte aligned? + jnz LVeryLongUnaligned // source is not aligned + jmp LVeryLongAligned + + .align 4,0x90 // 16-byte align inner loops +LVeryLongAligned: // aligned loop over 128-bytes + movdqa (%esi,%edx),%xmm0 + movdqa 16(%esi,%edx),%xmm1 + movdqa 32(%esi,%edx),%xmm2 + movdqa 48(%esi,%edx),%xmm3 + movdqa 64(%esi,%edx),%xmm4 + movdqa 80(%esi,%edx),%xmm5 + movdqa 96(%esi,%edx),%xmm6 + movdqa 112(%esi,%edx),%xmm7 + + movntdq %xmm0,(%edi,%edx) + movntdq %xmm1,16(%edi,%edx) + movntdq %xmm2,32(%edi,%edx) + movntdq %xmm3,48(%edi,%edx) + movntdq %xmm4,64(%edi,%edx) + movntdq %xmm5,80(%edi,%edx) + movntdq %xmm6,96(%edi,%edx) + movntdq %xmm7,112(%edi,%edx) + + subl $-128,%edx // add 128 with an 8-bit immediate + jnz LVeryLongAligned + jmp LVeryLongChunkEnd + + .align 4,0x90 // 16-byte align inner loops +LVeryLongUnaligned: // unaligned loop over 128-bytes + movdqu (%esi,%edx),%xmm0 + movdqu 16(%esi,%edx),%xmm1 + movdqu 32(%esi,%edx),%xmm2 + movdqu 48(%esi,%edx),%xmm3 + movdqu 64(%esi,%edx),%xmm4 + movdqu 80(%esi,%edx),%xmm5 + movdqu 96(%esi,%edx),%xmm6 + movdqu 112(%esi,%edx),%xmm7 + + movntdq %xmm0,(%edi,%edx) + movntdq %xmm1,16(%edi,%edx) + movntdq %xmm2,32(%edi,%edx) + movntdq %xmm3,48(%edi,%edx) + movntdq %xmm4,64(%edi,%edx) + movntdq %xmm5,80(%edi,%edx) + movntdq %xmm6,96(%edi,%edx) + movntdq %xmm7,112(%edi,%edx) + + subl $-128,%edx // add 128 with an 8-bit immediate + jnz LVeryLongUnaligned + +LVeryLongChunkEnd: + cmpl $4096,%ecx // at least another page to go? + jae LBigChunkLoop // yes + + sfence // required by non-temporal stores + popl %ebx + jmp Lrejoin // handle remaining (0..4095) bytes + + +// Reverse moves. +// ecx = length +// esi = source ptr +// edi = dest ptr + +LReverse: + addl %ecx,%esi // point to end of strings + addl %ecx,%edi + cmpl $(kShort),%ecx // long enough to bother with SSE? + ja LReverseNotShort // yes + +// Handle reverse short copies. +// ecx = length +// esi = one byte past end of source +// edi = one byte past end of dest + +LReverseShort: + movl %ecx,%edx // copy length + shrl $2,%ecx // #words + jz 3f +1: + subl $4,%esi + movl (%esi),%eax + subl $4,%edi + movl %eax,(%edi) + dec %ecx + jnz 1b +3: + andl $3,%edx // bytes? + jz 5f +4: + dec %esi + movb (%esi),%al + dec %edi + movb %al,(%edi) + dec %edx + jnz 4b +5: + movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove + popl %edi + popl %esi + popl %ebp + ret + +// Handle a reverse move long enough to justify using SSE. +// ecx = length +// esi = one byte past end of source +// edi = one byte past end of dest + +LReverseNotShort: + movl %edi,%edx // copy destination + andl $15,%edx // get #bytes to align destination + je LReverseDestAligned // already aligned + subl %edx,%ecx // adjust length +1: // loop copying 1..15 bytes + dec %esi + movb (%esi),%al + dec %edi + movb %al,(%edi) + dec %edx + jnz 1b + +// Destination is now aligned. Prepare for reverse loops. + +LReverseDestAligned: + movl %ecx,%edx // copy length + andl $63,%ecx // get remaining bytes for Lshort + andl $-64,%edx // get number of bytes we will copy in inner loop + subl %edx,%esi // point to endpoint of copy + subl %edx,%edi + testl $15,%esi // is source aligned too? + jnz LReverseUnalignedLoop // no + jmp LReverseAlignedLoop // use aligned loop + + .align 4,0x90 // 16-byte align inner loops +LReverseAlignedLoop: // loop over 64-byte chunks + movdqa -16(%esi,%edx),%xmm0 + movdqa -32(%esi,%edx),%xmm1 + movdqa -48(%esi,%edx),%xmm2 + movdqa -64(%esi,%edx),%xmm3 + + movdqa %xmm0,-16(%edi,%edx) + movdqa %xmm1,-32(%edi,%edx) + movdqa %xmm2,-48(%edi,%edx) + movdqa %xmm3,-64(%edi,%edx) + + subl $64,%edx + jne LReverseAlignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + +// Reverse, unaligned loop. LDDQU==MOVDQU on these machines. + + .align 4,0x90 // 16-byte align inner loops +LReverseUnalignedLoop: // loop over 64-byte chunks + movdqu -16(%esi,%edx),%xmm0 + movdqu -32(%esi,%edx),%xmm1 + movdqu -48(%esi,%edx),%xmm2 + movdqu -64(%esi,%edx),%xmm3 + + movdqa %xmm0,-16(%edi,%edx) + movdqa %xmm1,-32(%edi,%edx) + movdqa %xmm2,-48(%edi,%edx) + movdqa %xmm3,-64(%edi,%edx) + + subl $64,%edx + jne LReverseUnalignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + + COMMPAGE_DESCRIPTOR(bcopy_sse3,_COMM_PAGE_BCOPY,kHasSSE2+kCache64,kHasSupplementalSSE3) diff --git a/osfmk/i386/commpage/bcopy_sse4.s b/osfmk/i386/commpage/bcopy_sse4.s new file mode 100644 index 000000000..a56ee219d --- /dev/null +++ b/osfmk/i386/commpage/bcopy_sse4.s @@ -0,0 +1,799 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * The bcopy/memcpy loops, tuned for Pentium-M class processors with SSE4 + * and 64-byte cache lines. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (500*1024) // large enough for non-temporal stores (must be >= 8192) +#define kFastUCode ((16*1024)-15) // cutoff for microcode fastpath for "rep/movsl" + + +// void bcopy(const void *src, void *dst, size_t len); + + .text + .align 5, 0x90 +LZero: +Lbcopy_sse4: // void bcopy(const void *src, void *dst, size_t len) + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %esi + pushl %edi + movl 8(%ebp),%esi // get source ptr + movl 12(%ebp),%edi // get dest ptr + movl 16(%ebp),%ecx // get length + movl %edi,%edx + subl %esi,%edx // (dest - source) + cmpl %ecx,%edx // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpl $(kShort),%ecx // long enough to bother with SSE? + jbe Lshort // no + jmp LNotShort + +// +// void *memcpy(void *dst, const void *src, size_t len); +// void *memmove(void *dst, const void *src, size_t len); +// +// NB: These need to be 32 bytes from bcopy(): +// + + .align 5, 0x90 +Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len) +Lmemmove: // void *memmove(void *dst, const void *src, size_t len) + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %esi + pushl %edi + movl 8(%ebp),%edi // get dest ptr + movl 12(%ebp),%esi // get source ptr + movl 16(%ebp),%ecx // get length + movl %edi,%edx + subl %esi,%edx // (dest - source) + cmpl %ecx,%edx // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpl $(kShort),%ecx // long enough to bother with SSE? + ja LNotShort // yes + +// Handle short forward copies. As the most common case, this is the fall-through path. +// ecx = length (<= kShort) +// esi = source ptr +// edi = dest ptr + +Lshort: + movl %ecx,%edx // copy length + shrl $2,%ecx // get #doublewords + jz LLeftovers +2: // loop copying doublewords + movl (%esi),%eax + addl $4,%esi + movl %eax,(%edi) + addl $4,%edi + dec %ecx + jnz 2b +LLeftovers: // handle leftover bytes (0..3) in last word + andl $3,%edx // any leftover bytes? + jz Lexit +4: // loop copying bytes + movb (%esi),%al + inc %esi + movb %al,(%edi) + inc %edi + dec %edx + jnz 4b +Lexit: + movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove + popl %edi + popl %esi + popl %ebp + ret + + +LReverseIsland: // keep the "jb" above a short branch... + jmp LReverse // ...because reverse moves are uncommon + + +// Handle forward moves that are long enough to justify use of SSE3. +// First, 16-byte align the destination. +// ecx = length (> kShort) +// esi = source ptr +// edi = dest ptr + +LNotShort: + cmpl $(kVeryLong),%ecx // long enough to justify heavyweight loops? + movl %edi,%edx // copy destination + jae LVeryLong // use very-long-operand path + negl %edx + andl $15,%edx // get #bytes to align destination + jz LDestAligned // already aligned + subl %edx,%ecx // decrement length +1: // loop copying 1..15 bytes + movb (%esi),%al + inc %esi + movb %al,(%edi) + inc %edi + dec %edx + jnz 1b + +// Destination is now aligned. Dispatch to one of sixteen loops over 64-byte chunks, +// based on the alignment of the source. All vector loads and stores are aligned. +// Even though this means we have to shift and repack vectors, doing so is much faster +// than unaligned loads. Since kShort>=80 and we've moved at most 15 bytes already, +// there is at least one chunk. When we enter the copy loops, the following registers +// are set up: +// ecx = residual length (0..63) +// edx = -(length to move), a multiple of 64 +// esi = ptr to 1st source byte not to move (unaligned) +// edi = ptr to 1st dest byte not to move (aligned) + +LDestAligned: + movl %ecx,%edx // copy length + movl %esi,%eax // copy source address + andl $63,%ecx // get remaining bytes for Lshort + andl $-64,%edx // get number of bytes we will copy in inner loop + andl $15,%eax // mask to low 4 bits of source address + addl %edx,%esi // point to 1st byte not copied + addl %edx,%edi + negl %edx // now generate offset to 1st byte to be copied + movl (_COMM_PAGE_BCOPY+LTable-LZero)(,%eax,4),%eax + jmp *%eax + + .align 2 +LTable: // table of copy loop addresses + .long LMod0 + _COMM_PAGE_BCOPY - LZero + .long LMod1 + _COMM_PAGE_BCOPY - LZero + .long LMod2 + _COMM_PAGE_BCOPY - LZero + .long LMod3 + _COMM_PAGE_BCOPY - LZero + .long LMod4 + _COMM_PAGE_BCOPY - LZero + .long LMod5 + _COMM_PAGE_BCOPY - LZero + .long LMod6 + _COMM_PAGE_BCOPY - LZero + .long LMod7 + _COMM_PAGE_BCOPY - LZero + .long LMod8 + _COMM_PAGE_BCOPY - LZero + .long LMod9 + _COMM_PAGE_BCOPY - LZero + .long LMod10 + _COMM_PAGE_BCOPY - LZero + .long LMod11 + _COMM_PAGE_BCOPY - LZero + .long LMod12 + _COMM_PAGE_BCOPY - LZero + .long LMod13 + _COMM_PAGE_BCOPY - LZero + .long LMod14 + _COMM_PAGE_BCOPY - LZero + .long LMod15 + _COMM_PAGE_BCOPY - LZero + + +// Very long forward moves. These are at least several pages. They are special cased +// and aggressively optimized, not so much because they are common or useful, but +// because they are subject to benchmark. There isn't enough room for them in the +// area reserved on the commpage for bcopy, so we put them elsewhere. We call +// the longcopy routine using the normal ABI. + +LVeryLong: + pushl %ecx // length (>= kVeryLong) + pushl %esi // source ptr + pushl %edi // dest ptr + movl $(_COMM_PAGE_LONGCOPY),%eax + call *%eax // do the long copy + addl $12,%esp // pop off our parameters + jmp Lexit + + +// On Pentium-M, the microcode for "rep/movsl" is faster than SSE for 8-byte +// aligned operands from about 32KB up to kVeryLong for the hot cache case, and from +// about 256 bytes up to kVeryLong for cold caches. This is because the microcode +// avoids having to read destination cache lines that will be completely overwritten. +// The cutoff we use (ie, kFastUCode) must somehow balance the two cases, since +// we do not know if the destination is in cache or not. + +Lfastpath: + addl %edx,%esi // restore ptrs to 1st byte of source and dest + addl %edx,%edi + negl %edx // make length positive + orl %edx,%ecx // restore total #bytes remaining to move + cld // we'll move forward + movl %ecx,%edx // copy total length to move + shrl $2,%ecx // compute #words to move + rep // the u-code will optimize this + movsl + jmp LLeftovers // handle 0..3 leftover bytes + + +// Forward loop for medium length operands in which low four bits of %esi == 0000 + +LMod0: + cmpl $(-kFastUCode),%edx // %edx == -length, where (length < kVeryLong) + jle Lfastpath // long enough for fastpath in microcode + jmp 1f + .align 4,0x90 // 16-byte align inner loops +1: // loop over 64-byte chunks + movdqa (%esi,%edx),%xmm0 + movdqa 16(%esi,%edx),%xmm1 + movdqa 32(%esi,%edx),%xmm2 + movdqa 48(%esi,%edx),%xmm3 + + movdqa %xmm0,(%edi,%edx) + movdqa %xmm1,16(%edi,%edx) + movdqa %xmm2,32(%edi,%edx) + movdqa %xmm3,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0001 + +LMod1: + movdqa -1(%esi,%edx),%xmm0 // prime the loop by loading 1st quadword +1: // loop over 64-byte chunks + movdqa 15(%esi,%edx),%xmm1 + movdqa 31(%esi,%edx),%xmm2 + movdqa 47(%esi,%edx),%xmm3 + movdqa 63(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $1,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $1,%xmm2,%xmm3 + palignr $1,%xmm1,%xmm2 + palignr $1,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0010 + +LMod2: + movdqa -2(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 14(%esi,%edx),%xmm1 + movdqa 30(%esi,%edx),%xmm2 + movdqa 46(%esi,%edx),%xmm3 + movdqa 62(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $2,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $2,%xmm2,%xmm3 + palignr $2,%xmm1,%xmm2 + palignr $2,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0011 + +LMod3: + movdqa -3(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 13(%esi,%edx),%xmm1 + movdqa 29(%esi,%edx),%xmm2 + movdqa 45(%esi,%edx),%xmm3 + movdqa 61(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $3,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $3,%xmm2,%xmm3 + palignr $3,%xmm1,%xmm2 + palignr $3,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0100 +// We use the float single data type in order to use "movss" to merge vectors. + +LMod4: + movaps -4(%esi,%edx),%xmm0 // 4-byte aligned: prime the loop + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + movaps 12(%esi,%edx),%xmm1 + movaps 28(%esi,%edx),%xmm2 + movss %xmm1,%xmm0 // copy low 4 bytes of source into destination + pshufd $(0x39),%xmm0,%xmm0 // rotate right 4 bytes (mask -- 00 11 10 01) + movaps 44(%esi,%edx),%xmm3 + movss %xmm2,%xmm1 + pshufd $(0x39),%xmm1,%xmm1 + movaps 60(%esi,%edx),%xmm4 + movss %xmm3,%xmm2 + pshufd $(0x39),%xmm2,%xmm2 + + movaps %xmm0,(%edi,%edx) + movss %xmm4,%xmm3 + pshufd $(0x39),%xmm3,%xmm3 + movaps %xmm1,16(%edi,%edx) + movaps %xmm2,32(%edi,%edx) + movaps %xmm4,%xmm0 + movaps %xmm3,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0101 + +LMod5: + movdqa -5(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 11(%esi,%edx),%xmm1 + movdqa 27(%esi,%edx),%xmm2 + movdqa 43(%esi,%edx),%xmm3 + movdqa 59(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $5,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $5,%xmm2,%xmm3 + palignr $5,%xmm1,%xmm2 + palignr $5,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0110 + +LMod6: + movdqa -6(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 10(%esi,%edx),%xmm1 + movdqa 26(%esi,%edx),%xmm2 + movdqa 42(%esi,%edx),%xmm3 + movdqa 58(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $6,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $6,%xmm2,%xmm3 + palignr $6,%xmm1,%xmm2 + palignr $6,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 0111 + +LMod7: + movdqa -7(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 9(%esi,%edx),%xmm1 + movdqa 25(%esi,%edx),%xmm2 + movdqa 41(%esi,%edx),%xmm3 + movdqa 57(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $7,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $7,%xmm2,%xmm3 + palignr $7,%xmm1,%xmm2 + palignr $7,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1000 +// We use the float double data type in order to use "shufpd" to shift by 8 bytes. + +LMod8: + cmpl $(-kFastUCode),%edx // %edx == -length, where (length < kVeryLong) + jle Lfastpath // long enough for fastpath in microcode + movapd -8(%esi,%edx),%xmm0 // 8-byte aligned: prime the loop + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + movapd 8(%esi,%edx),%xmm1 + movapd 24(%esi,%edx),%xmm2 + shufpd $01,%xmm1,%xmm0 // %xmm0 <- shr( %xmm0 || %xmm1, 8 bytes) + movapd 40(%esi,%edx),%xmm3 + shufpd $01,%xmm2,%xmm1 + movapd 56(%esi,%edx),%xmm4 + shufpd $01,%xmm3,%xmm2 + + movapd %xmm0,(%edi,%edx) + shufpd $01,%xmm4,%xmm3 + movapd %xmm1,16(%edi,%edx) + movapd %xmm2,32(%edi,%edx) + movapd %xmm4,%xmm0 + movapd %xmm3,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1001 + +LMod9: + movdqa -9(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 7(%esi,%edx),%xmm1 + movdqa 23(%esi,%edx),%xmm2 + movdqa 39(%esi,%edx),%xmm3 + movdqa 55(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $9,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $9,%xmm2,%xmm3 + palignr $9,%xmm1,%xmm2 + palignr $9,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1010 + +LMod10: + movdqa -10(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 6(%esi,%edx),%xmm1 + movdqa 22(%esi,%edx),%xmm2 + movdqa 38(%esi,%edx),%xmm3 + movdqa 54(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $10,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $10,%xmm2,%xmm3 + palignr $10,%xmm1,%xmm2 + palignr $10,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1011 + +LMod11: + movdqa -11(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 5(%esi,%edx),%xmm1 + movdqa 21(%esi,%edx),%xmm2 + movdqa 37(%esi,%edx),%xmm3 + movdqa 53(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $11,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $11,%xmm2,%xmm3 + palignr $11,%xmm1,%xmm2 + palignr $11,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1100 +// We use the float single data type in order to use "movss" to merge vectors. + +LMod12: + movss (%esi,%edx),%xmm0 // prefetch 1st four bytes of source, right justified + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + pshufd $(0x93),4(%esi,%edx),%xmm1 // load and rotate right 12 bytes (mask -- 10 01 00 11) + pshufd $(0x93),20(%esi,%edx),%xmm2 + pshufd $(0x93),36(%esi,%edx),%xmm3 + pshufd $(0x93),52(%esi,%edx),%xmm4 + + movaps %xmm4,%xmm5 + movss %xmm3,%xmm4 // copy low 4 bytes of source into destination + movss %xmm2,%xmm3 + movss %xmm1,%xmm2 + movss %xmm0,%xmm1 + + movaps %xmm1,(%edi,%edx) + movaps %xmm2,16(%edi,%edx) + movaps %xmm5,%xmm0 + movaps %xmm3,32(%edi,%edx) + movaps %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1101 + +LMod13: + movdqa -13(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 3(%esi,%edx),%xmm1 + movdqa 19(%esi,%edx),%xmm2 + movdqa 35(%esi,%edx),%xmm3 + movdqa 51(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $13,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $13,%xmm2,%xmm3 + palignr $13,%xmm1,%xmm2 + palignr $13,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1110 + +LMod14: + movdqa -14(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 2(%esi,%edx),%xmm1 + movdqa 18(%esi,%edx),%xmm2 + movdqa 34(%esi,%edx),%xmm3 + movdqa 50(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $14,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $14,%xmm2,%xmm3 + palignr $14,%xmm1,%xmm2 + palignr $14,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %esi == 1111 + +LMod15: + movdqa -15(%esi,%edx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 1(%esi,%edx),%xmm1 + movdqa 17(%esi,%edx),%xmm2 + movdqa 33(%esi,%edx),%xmm3 + movdqa 49(%esi,%edx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $15,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $15,%xmm2,%xmm3 + palignr $15,%xmm1,%xmm2 + palignr $15,%xmm5,%xmm1 + + movdqa %xmm1,(%edi,%edx) + movdqa %xmm2,16(%edi,%edx) + movdqa %xmm3,32(%edi,%edx) + movdqa %xmm4,48(%edi,%edx) + + addl $64,%edx + jnz 1b + + jmp Lshort // copy remaining 0..63 bytes and done + + +// Reverse moves. These are not optimized as aggressively as their forward +// counterparts, as they are only used with destructive overlap. +// ecx = length +// esi = source ptr +// edi = dest ptr + +LReverse: + addl %ecx,%esi // point to end of strings + addl %ecx,%edi + cmpl $(kShort),%ecx // long enough to bother with SSE? + ja LReverseNotShort // yes + +// Handle reverse short copies. +// ecx = length +// esi = one byte past end of source +// edi = one byte past end of dest + +LReverseShort: + movl %ecx,%edx // copy length + shrl $2,%ecx // #words + jz 3f +1: + subl $4,%esi + movl (%esi),%eax + subl $4,%edi + movl %eax,(%edi) + dec %ecx + jnz 1b +3: + andl $3,%edx // bytes? + jz 5f +4: + dec %esi + movb (%esi),%al + dec %edi + movb %al,(%edi) + dec %edx + jnz 4b +5: + movl 8(%ebp),%eax // get return value (dst ptr) for memcpy/memmove + popl %edi + popl %esi + popl %ebp + ret + +// Handle a reverse move long enough to justify using SSE. +// ecx = length +// esi = one byte past end of source +// edi = one byte past end of dest + +LReverseNotShort: + movl %edi,%edx // copy destination + andl $15,%edx // get #bytes to align destination + je LReverseDestAligned // already aligned + subl %edx,%ecx // adjust length +1: // loop copying 1..15 bytes + dec %esi + movb (%esi),%al + dec %edi + movb %al,(%edi) + dec %edx + jnz 1b + +// Destination is now aligned. Prepare for reverse loops. + +LReverseDestAligned: + movl %ecx,%edx // copy length + andl $63,%ecx // get remaining bytes for Lshort + andl $-64,%edx // get number of bytes we will copy in inner loop + subl %edx,%esi // point to endpoint of copy + subl %edx,%edi + testl $15,%esi // is source aligned too? + jnz LReverseUnalignedLoop // no + +LReverseAlignedLoop: // loop over 64-byte chunks + movdqa -16(%esi,%edx),%xmm0 + movdqa -32(%esi,%edx),%xmm1 + movdqa -48(%esi,%edx),%xmm2 + movdqa -64(%esi,%edx),%xmm3 + + movdqa %xmm0,-16(%edi,%edx) + movdqa %xmm1,-32(%edi,%edx) + movdqa %xmm2,-48(%edi,%edx) + movdqa %xmm3,-64(%edi,%edx) + + subl $64,%edx + jne LReverseAlignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + +// Reverse, unaligned loop. LDDQU==MOVDQU on these machines. + +LReverseUnalignedLoop: // loop over 64-byte chunks + movdqu -16(%esi,%edx),%xmm0 + movdqu -32(%esi,%edx),%xmm1 + movdqu -48(%esi,%edx),%xmm2 + movdqu -64(%esi,%edx),%xmm3 + + movdqa %xmm0,-16(%edi,%edx) + movdqa %xmm1,-32(%edi,%edx) + movdqa %xmm2,-48(%edi,%edx) + movdqa %xmm3,-64(%edi,%edx) + + subl $64,%edx + jne LReverseUnalignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + + COMMPAGE_DESCRIPTOR(bcopy_sse4,_COMM_PAGE_BCOPY,kHasSSE3+kHasSupplementalSSE3+kCache64,0) diff --git a/osfmk/i386/commpage/bcopy_sse4_64.s b/osfmk/i386/commpage/bcopy_sse4_64.s new file mode 100644 index 000000000..a055422ab --- /dev/null +++ b/osfmk/i386/commpage/bcopy_sse4_64.s @@ -0,0 +1,791 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * The bcopy/memcpy loops, tuned for 64-bit Pentium-M class processors with + * SSE4 and 64-byte cache lines. This is the 64-bit version. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (500*1024) // large enough for non-temporal stores (>=8192 and <2GB) +#define kFastUCode ((16*1024)-15) // cutoff for microcode fastpath for "rep/movsl" + + +// void bcopy(const void *src, void *dst, size_t len); + + .text + .code64 + .align 5, 0x90 +LZero: +Lbcopy_sse4_64: // void bcopy(const void *src, void *dst, size_t len) + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq %rsi,%rax // copy dest ptr + movq %rdi,%rsi // xchange source and dest ptrs + movq %rax,%rdi + subq %rsi,%rax // (dest - source) + cmpq %rdx,%rax // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpq $(kShort),%rdx // long enough to bother with SSE? + jbe LShort // no + jmp LNotShort + +// +// void *memcpy(void *dst, const void *src, size_t len); +// void *memmove(void *dst, const void *src, size_t len); +// +// NB: These need to be 32 bytes from bcopy(): +// + + .align 5, 0x90 +Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len) +Lmemmove: // void *memmove(void *dst, const void *src, size_t len) + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq %rdi,%r11 // save return value here + movq %rdi,%rax + subq %rsi,%rax // (dest - source) + cmpq %rdx,%rax // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpq $(kShort),%rdx // long enough to bother with SSE? + ja LNotShort // yes + +// Handle short forward copies. As the most common case, this is the fall-through path. +// rdx = length (<= kShort) +// rsi = source ptr +// rdi = dest ptr + +LShort: + movl %edx,%ecx // copy length using 32-bit operation + shrl $2,%ecx // get #doublewords + jz LLeftovers +2: // loop copying doublewords + movl (%rsi),%eax + addq $4,%rsi + movl %eax,(%rdi) + addq $4,%rdi + decl %ecx + jnz 2b +LLeftovers: // handle leftover bytes (0..3) in last word + andl $3,%edx // any leftover bytes? + jz 5f +4: // loop copying bytes + movb (%rsi),%al + incq %rsi + movb %al,(%rdi) + incq %rdi + decl %edx + jnz 4b +5: + movq %r11,%rax // get return value (dst ptr) for memcpy/memmove + popq %rbp + ret + + +LReverseIsland: // keep the "jb" above a short branch... + jmp LReverse // ...because reverse moves are uncommon + + +// Handle forward moves that are long enough to justify use of SSE. +// First, 16-byte align the destination. +// rdx = length (> kShort) +// rsi = source ptr +// rdi = dest ptr + +LNotShort: + cmpq $(kVeryLong),%rdx // long enough to justify heavyweight loops? + jae LVeryLong // use very-long-operand path + movl %edi,%ecx // copy low half of destination ptr + negl %ecx + andl $15,%ecx // get #bytes to align destination + jz LDestAligned // already aligned + subl %ecx,%edx // decrement length + rep // align destination + movsb + + +// Destination is now aligned. Dispatch to one of sixteen loops over 64-byte chunks, +// based on the alignment of the source. All vector loads and stores are aligned. +// Even though this means we have to shift and repack vectors, doing so is much faster +// than unaligned loads. Since kShort>=80 and we've moved at most 15 bytes already, +// there is at least one chunk. When we enter the copy loops, the following registers +// are set up: +// rdx = residual length (0..63) +// rcx = -(length to move), a multiple of 64 less than 2GB +// rsi = ptr to 1st source byte not to move (unaligned) +// rdi = ptr to 1st dest byte not to move (aligned) + +LDestAligned: + movl %edx,%ecx // copy length + movl %esi,%eax // copy low half of source address + andl $63,%edx // get remaining bytes for LShort + andl $15,%eax // mask to low 4 bits of source address + andl $-64,%ecx // get number of bytes we will copy in inner loop +// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block. +// lea LTable(%rip),%r8 // point to dispatch table + movq $(_COMM_PAGE_32_TO_64(_COMM_PAGE_BCOPY)),%r8 // work around 4586528 + addq $(LTable-LZero),%r8 // work around 4586528 + addq %rcx,%rsi // point to 1st byte not copied + addq %rcx,%rdi + movl (%r8,%rax,4),%eax // get offset of routine + negq %rcx // now generate offset to 1st byte to be copied + addq %r8,%rax // generate address of copy loop + jmp *%rax // enter copy loop, selected by source alignment + + .align 2 +LTable: // table of copy loop addresses + .long (LMod0 - LTable) + .long (LMod1 - LTable) + .long (LMod2 - LTable) + .long (LMod3 - LTable) + .long (LMod4 - LTable) + .long (LMod5 - LTable) + .long (LMod6 - LTable) + .long (LMod7 - LTable) + .long (LMod8 - LTable) + .long (LMod9 - LTable) + .long (LMod10 - LTable) + .long (LMod11 - LTable) + .long (LMod12 - LTable) + .long (LMod13 - LTable) + .long (LMod14 - LTable) + .long (LMod15 - LTable) + + +// Very long forward moves. These are at least several pages. They are special cased +// and aggressively optimized, not so much because they are common or useful, but +// because they are subject to benchmark. There isn't enough room for them in the +// area reserved on the commpage for bcopy, so we put them elsewhere. We call +// the longcopy routine using the normal ABI: +// rdi = dest +// rsi = source +// rdx = length (>= kVeryLong bytes) + +LVeryLong: + pushq %r11 // save return value + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_LONGCOPY),%rax + call *%rax // call very long operand routine + popq %rax // pop return value + popq %rbp + ret + + +// On Pentium-M, the microcode for "rep/movsl" is faster than SSE for 16-byte +// aligned operands from about 32KB up to kVeryLong for the hot cache case, and from +// about 256 bytes up to kVeryLong for cold caches. This is because the microcode +// avoids having to read destination cache lines that will be completely overwritten. +// The cutoff we use (ie, kFastUCode) must somehow balance the two cases, since +// we do not know if the destination is in cache or not. + +Lfastpath: + addq %rcx,%rsi // restore ptrs to 1st byte of source and dest + addq %rcx,%rdi + negl %ecx // make length positive (known to be < 2GB) + orl %edx,%ecx // restore total #bytes remaining to move + cld // we'll move forward + shrl $2,%ecx // compute #words to move + rep // the u-code will optimize this + movsl + jmp LLeftovers // handle 0..3 leftover bytes + + +// Forward loop for medium length operands in which low four bits of %rsi == 0000 + +LMod0: + cmpl $(-kFastUCode),%ecx // %rcx == -length, where (length < kVeryLong) + jle Lfastpath // long enough for fastpath in microcode + jmp 1f + .align 4,0x90 // 16-byte align inner loops +1: // loop over 64-byte chunks + movdqa (%rsi,%rcx),%xmm0 + movdqa 16(%rsi,%rcx),%xmm1 + movdqa 32(%rsi,%rcx),%xmm2 + movdqa 48(%rsi,%rcx),%xmm3 + + movdqa %xmm0,(%rdi,%rcx) + movdqa %xmm1,16(%rdi,%rcx) + movdqa %xmm2,32(%rdi,%rcx) + movdqa %xmm3,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0001 + +LMod1: + movdqa -1(%rsi,%rcx),%xmm0 // prime the loop by loading 1st quadword +1: // loop over 64-byte chunks + movdqa 15(%rsi,%rcx),%xmm1 + movdqa 31(%rsi,%rcx),%xmm2 + movdqa 47(%rsi,%rcx),%xmm3 + movdqa 63(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $1,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $1,%xmm2,%xmm3 + palignr $1,%xmm1,%xmm2 + palignr $1,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0010 + +LMod2: + movdqa -2(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 14(%rsi,%rcx),%xmm1 + movdqa 30(%rsi,%rcx),%xmm2 + movdqa 46(%rsi,%rcx),%xmm3 + movdqa 62(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $2,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $2,%xmm2,%xmm3 + palignr $2,%xmm1,%xmm2 + palignr $2,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0011 + +LMod3: + movdqa -3(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 13(%rsi,%rcx),%xmm1 + movdqa 29(%rsi,%rcx),%xmm2 + movdqa 45(%rsi,%rcx),%xmm3 + movdqa 61(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $3,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $3,%xmm2,%xmm3 + palignr $3,%xmm1,%xmm2 + palignr $3,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0100 +// We use the float single data type in order to use "movss" to merge vectors. + +LMod4: + movaps -4(%rsi,%rcx),%xmm0 // 4-byte aligned: prime the loop + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + movaps 12(%rsi,%rcx),%xmm1 + movaps 28(%rsi,%rcx),%xmm2 + movss %xmm1,%xmm0 // copy low 4 bytes of source into destination + pshufd $(0x39),%xmm0,%xmm0 // rotate right 4 bytes (mask -- 00 11 10 01) + movaps 44(%rsi,%rcx),%xmm3 + movss %xmm2,%xmm1 + pshufd $(0x39),%xmm1,%xmm1 + movaps 60(%rsi,%rcx),%xmm4 + movss %xmm3,%xmm2 + pshufd $(0x39),%xmm2,%xmm2 + + movaps %xmm0,(%rdi,%rcx) + movss %xmm4,%xmm3 + pshufd $(0x39),%xmm3,%xmm3 + movaps %xmm1,16(%rdi,%rcx) + movaps %xmm2,32(%rdi,%rcx) + movaps %xmm4,%xmm0 + movaps %xmm3,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0101 + +LMod5: + movdqa -5(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 11(%rsi,%rcx),%xmm1 + movdqa 27(%rsi,%rcx),%xmm2 + movdqa 43(%rsi,%rcx),%xmm3 + movdqa 59(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $5,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $5,%xmm2,%xmm3 + palignr $5,%xmm1,%xmm2 + palignr $5,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0110 + +LMod6: + movdqa -6(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 10(%rsi,%rcx),%xmm1 + movdqa 26(%rsi,%rcx),%xmm2 + movdqa 42(%rsi,%rcx),%xmm3 + movdqa 58(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $6,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $6,%xmm2,%xmm3 + palignr $6,%xmm1,%xmm2 + palignr $6,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 0111 + +LMod7: + movdqa -7(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 9(%rsi,%rcx),%xmm1 + movdqa 25(%rsi,%rcx),%xmm2 + movdqa 41(%rsi,%rcx),%xmm3 + movdqa 57(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $7,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $7,%xmm2,%xmm3 + palignr $7,%xmm1,%xmm2 + palignr $7,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1000 +// We use the float double data type in order to use "shufpd" to shift by 8 bytes. + +LMod8: + cmpl $(-kFastUCode),%ecx // %rcx == -length, where (length < kVeryLong) + jle Lfastpath // long enough for fastpath in microcode + movapd -8(%rsi,%rcx),%xmm0 // 8-byte aligned: prime the loop + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + movapd 8(%rsi,%rcx),%xmm1 + movapd 24(%rsi,%rcx),%xmm2 + shufpd $01,%xmm1,%xmm0 // %xmm0 <- shr( %xmm0 || %xmm1, 8 bytes) + movapd 40(%rsi,%rcx),%xmm3 + shufpd $01,%xmm2,%xmm1 + movapd 56(%rsi,%rcx),%xmm4 + shufpd $01,%xmm3,%xmm2 + + movapd %xmm0,(%rdi,%rcx) + shufpd $01,%xmm4,%xmm3 + movapd %xmm1,16(%rdi,%rcx) + movapd %xmm2,32(%rdi,%rcx) + movapd %xmm4,%xmm0 + movapd %xmm3,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1001 + +LMod9: + movdqa -9(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 7(%rsi,%rcx),%xmm1 + movdqa 23(%rsi,%rcx),%xmm2 + movdqa 39(%rsi,%rcx),%xmm3 + movdqa 55(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $9,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $9,%xmm2,%xmm3 + palignr $9,%xmm1,%xmm2 + palignr $9,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1010 + +LMod10: + movdqa -10(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 6(%rsi,%rcx),%xmm1 + movdqa 22(%rsi,%rcx),%xmm2 + movdqa 38(%rsi,%rcx),%xmm3 + movdqa 54(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $10,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $10,%xmm2,%xmm3 + palignr $10,%xmm1,%xmm2 + palignr $10,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1011 + +LMod11: + movdqa -11(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 5(%rsi,%rcx),%xmm1 + movdqa 21(%rsi,%rcx),%xmm2 + movdqa 37(%rsi,%rcx),%xmm3 + movdqa 53(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $11,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $11,%xmm2,%xmm3 + palignr $11,%xmm1,%xmm2 + palignr $11,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1100 +// We use the float single data type in order to use "movss" to merge vectors. + +LMod12: + movss (%rsi,%rcx),%xmm0 // prefetch 1st four bytes of source, right justified + jmp 1f + .align 4,0x90 +1: // loop over 64-byte chunks + pshufd $(0x93),4(%rsi,%rcx),%xmm1 // load and rotate right 12 bytes (mask -- 10 01 00 11) + pshufd $(0x93),20(%rsi,%rcx),%xmm2 + pshufd $(0x93),36(%rsi,%rcx),%xmm3 + pshufd $(0x93),52(%rsi,%rcx),%xmm4 + + movaps %xmm4,%xmm5 + movss %xmm3,%xmm4 // copy low 4 bytes of source into destination + movss %xmm2,%xmm3 + movss %xmm1,%xmm2 + movss %xmm0,%xmm1 + + movaps %xmm1,(%rdi,%rcx) + movaps %xmm2,16(%rdi,%rcx) + movaps %xmm5,%xmm0 + movaps %xmm3,32(%rdi,%rcx) + movaps %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1101 + +LMod13: + movdqa -13(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 3(%rsi,%rcx),%xmm1 + movdqa 19(%rsi,%rcx),%xmm2 + movdqa 35(%rsi,%rcx),%xmm3 + movdqa 51(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $13,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $13,%xmm2,%xmm3 + palignr $13,%xmm1,%xmm2 + palignr $13,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1110 + +LMod14: + movdqa -14(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 2(%rsi,%rcx),%xmm1 + movdqa 18(%rsi,%rcx),%xmm2 + movdqa 34(%rsi,%rcx),%xmm3 + movdqa 50(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $14,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $14,%xmm2,%xmm3 + palignr $14,%xmm1,%xmm2 + palignr $14,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Forward loop for medium length operands in which low four bits of %rsi == 1111 + +LMod15: + movdqa -15(%rsi,%rcx),%xmm0 // prime the loop by loading 1st source dq +1: // loop over 64-byte chunks + movdqa 1(%rsi,%rcx),%xmm1 + movdqa 17(%rsi,%rcx),%xmm2 + movdqa 33(%rsi,%rcx),%xmm3 + movdqa 49(%rsi,%rcx),%xmm4 + + movdqa %xmm0,%xmm5 + movdqa %xmm4,%xmm0 + + palignr $15,%xmm3,%xmm4 // dest <- shr( dest || source, imm*8 ) + palignr $15,%xmm2,%xmm3 + palignr $15,%xmm1,%xmm2 + palignr $15,%xmm5,%xmm1 + + movdqa %xmm1,(%rdi,%rcx) + movdqa %xmm2,16(%rdi,%rcx) + movdqa %xmm3,32(%rdi,%rcx) + movdqa %xmm4,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + + +// Reverse moves. These are not optimized as aggressively as their forward +// counterparts, as they are only used with destructive overlap. +// rdx = length +// rsi = source ptr +// rdi = dest ptr + +LReverse: + addq %rdx,%rsi // point to end of strings + addq %rdx,%rdi + cmpq $(kShort),%rdx // long enough to bother with SSE? + ja LReverseNotShort // yes + +// Handle reverse short copies. +// edx = length (<= kShort) +// rsi = one byte past end of source +// rdi = one byte past end of dest + +LReverseShort: + movl %edx,%ecx // copy length + shrl $3,%ecx // #quadwords + jz 3f +1: + subq $8,%rsi + movq (%rsi),%rax + subq $8,%rdi + movq %rax,(%rdi) + decl %ecx + jnz 1b +3: + andl $7,%edx // bytes? + jz 5f +4: + decq %rsi + movb (%rsi),%al + decq %rdi + movb %al,(%rdi) + decl %edx + jnz 4b +5: + movq %r11,%rax // get return value (dst ptr) for memcpy/memmove + popq %rbp + ret + +// Handle a reverse move long enough to justify using SSE. +// rdx = length (> kShort) +// rsi = one byte past end of source +// rdi = one byte past end of dest + +LReverseNotShort: + movl %edi,%ecx // copy destination + andl $15,%ecx // get #bytes to align destination + je LReverseDestAligned // already aligned + subq %rcx,%rdx // adjust length +1: // loop copying 1..15 bytes + decq %rsi + movb (%rsi),%al + decq %rdi + movb %al,(%rdi) + decl %ecx + jnz 1b + +// Destination is now aligned. Prepare for reverse loops. + +LReverseDestAligned: + movq %rdx,%rcx // copy length + andl $63,%edx // get remaining bytes for LReverseShort + andq $-64,%rcx // get number of bytes we will copy in inner loop + subq %rcx,%rsi // point to endpoint of copy + subq %rcx,%rdi + testl $15,%esi // is source aligned too? + jnz LReverseUnalignedLoop // no + +LReverseAlignedLoop: // loop over 64-byte chunks + movdqa -16(%rsi,%rcx),%xmm0 + movdqa -32(%rsi,%rcx),%xmm1 + movdqa -48(%rsi,%rcx),%xmm2 + movdqa -64(%rsi,%rcx),%xmm3 + + movdqa %xmm0,-16(%rdi,%rcx) + movdqa %xmm1,-32(%rdi,%rcx) + movdqa %xmm2,-48(%rdi,%rcx) + movdqa %xmm3,-64(%rdi,%rcx) + + subq $64,%rcx + jne LReverseAlignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + +// Reverse, unaligned loop. LDDQU==MOVDQU on these machines. + +LReverseUnalignedLoop: // loop over 64-byte chunks + movdqu -16(%rsi,%rcx),%xmm0 + movdqu -32(%rsi,%rcx),%xmm1 + movdqu -48(%rsi,%rcx),%xmm2 + movdqu -64(%rsi,%rcx),%xmm3 + + movdqa %xmm0,-16(%rdi,%rcx) + movdqa %xmm1,-32(%rdi,%rcx) + movdqa %xmm2,-48(%rdi,%rcx) + movdqa %xmm3,-64(%rdi,%rcx) + + subq $64,%rcx + jne LReverseUnalignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + + COMMPAGE_DESCRIPTOR(bcopy_sse4_64,_COMM_PAGE_BCOPY,kHasSSE3+kHasSupplementalSSE3+kCache64,0) diff --git a/osfmk/i386/commpage/bzero_scalar.s b/osfmk/i386/commpage/bzero_scalar.s index 33987a77f..9e3195681 100644 --- a/osfmk/i386/commpage/bzero_scalar.s +++ b/osfmk/i386/commpage/bzero_scalar.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,10 +65,12 @@ .text .align 5, 0x90 Lbzero_scalar: + pushl %ebp /* set up a frame for backtraces */ + movl %esp,%ebp pushl %edi pushl %ebx - movl 12(%esp),%edi - movl 16(%esp),%ecx + movl 8(%ebp),%edi + movl 12(%ebp),%ecx cld /* set fill direction forward */ xorl %eax,%eax /* set fill data to 0 */ @@ -79,7 +81,7 @@ Lbzero_scalar: * unaligned set. */ cmpl $0x0f,%ecx - jle L1 + jbe L1 movl %edi,%edx /* compute misalignment */ negl %edx @@ -103,6 +105,7 @@ L1: rep popl %ebx popl %edi + popl %ebp ret - COMMPAGE_DESCRIPTOR(bzero_scalar,_COMM_PAGE_BZERO,0,0) + COMMPAGE_DESCRIPTOR(bzero_scalar,_COMM_PAGE_BZERO,0,kHasSSE2) diff --git a/osfmk/i386/commpage/bzero_sse3.s b/osfmk/i386/commpage/bzero_sse3.s new file mode 100644 index 000000000..a7a6963eb --- /dev/null +++ b/osfmk/i386/commpage/bzero_sse3.s @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * Bzero, tuned for Pentium-M class processors with SSE3 + * and 64-byte cache lines. + * + * This routine is also used for memset(p,0,n), which is a common case + * since gcc sometimes silently maps bzero() into memset(). As a result, + * we always load the original ptr into %eax before returning. + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (1024*1024) + + + .text + .align 5, 0x90 +Lbzero_sse3: // void bzero(void *b, size_t len); + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %edi + movl 8(%ebp),%edi // get ptr + movl 12(%ebp),%edx // get length + + xorl %eax,%eax // set fill data to 0 + cmpl $(kShort),%edx // long enough for SSE? + jg LNotShort // yes + +// Here for short operands or the end of long ones. +// %edx = length +// %edi = ptr +// %eax = zero + +Lshort: + cmpl $16,%edx // long enough to word align? + jge 3f // yes + test %edx,%edx // length==0? + jz 6f +1: + movb %al,(%edi) // zero a byte + inc %edi + dec %edx + jnz 1b + jmp 6f +2: + movb %al,(%edi) // zero a byte + inc %edi + dec %edx +3: + test $3,%edi // is ptr doubleword aligned? + jnz 2b // no + movl %edx,%ecx // copy length + shrl $2,%edx // #doublewords to store +4: + movl %eax,(%edi) // zero an aligned doubleword + addl $4,%edi + dec %edx + jnz 4b + andl $3,%ecx // mask down to #bytes at end (0..3) + jz 6f // none +5: + movb %al,(%edi) // zero a byte + inc %edi + dec %ecx + jnz 5b +6: + movl 8(%ebp),%eax // get return value in case this was a call of memset() + popl %edi + popl %ebp + ret + + +// We will be using SSE, so align ptr. + +LNotShort: + movl %edi,%ecx + negl %ecx + andl $15,%ecx // mask down to #bytes to 16-byte align + jz LDestAligned // already aligned + subl %ecx,%edx // decrement length +0: // loop storing bytes to align the ptr + movb %al,(%edi) // pack in a byte + inc %edi + dec %ecx + jnz 0b + +// Destination is now 16-byte aligned. Prepare to loop over 64-byte chunks. +// %edx = length +// %edi = ptr +// %eax = zero + +LDestAligned: + movl %edx,%ecx + andl $63,%edx // mask down to residual length (0..63) + andl $-64,%ecx // get #bytes we will zero in this loop + pxor %xmm0,%xmm0 // zero an SSE register + addl %ecx,%edi // increment ptr by length to move + cmpl $(kVeryLong),%ecx // long enough to justify non-temporal stores? + jae LVeryLong // yes + negl %ecx // negate length to move + jmp 1f + +// Loop over 64-byte chunks, storing into cache. + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movdqa %xmm0,(%edi,%ecx) + movdqa %xmm0,16(%edi,%ecx) + movdqa %xmm0,32(%edi,%ecx) + movdqa %xmm0,48(%edi,%ecx) + addl $64,%ecx + jne 1b + + jmp Lshort + +// Very long operands: use non-temporal stores to bypass cache. + +LVeryLong: + negl %ecx // negate length to move + jmp 1f + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movntdq %xmm0,(%edi,%ecx) + movntdq %xmm0,16(%edi,%ecx) + movntdq %xmm0,32(%edi,%ecx) + movntdq %xmm0,48(%edi,%ecx) + addl $64,%ecx + jne 1b + + sfence // required by non-temporal stores + jmp Lshort + + + COMMPAGE_DESCRIPTOR(bzero_sse3,_COMM_PAGE_BZERO,kHasSSE2,0) diff --git a/osfmk/i386/commpage/bzero_sse3_64.s b/osfmk/i386/commpage/bzero_sse3_64.s new file mode 100644 index 000000000..98d012f3b --- /dev/null +++ b/osfmk/i386/commpage/bzero_sse3_64.s @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* + * Bzero, tuned for Pentium-M class processors with SSE3 + * and 64-byte cache lines. This is the 64-bit version. + * + * This routine is also used for memset(p,0,n), which is a common case + * since gcc sometimes silently maps bzero() into memset(). As a result, + * we always load the original ptr into %eax before returning. + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (1024*1024) + + + .text + .code64 + .align 5, 0x90 +Lbzero_sse3_64: // void bzero(void *b, size_t len); + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + xorl %eax,%eax // set fill data to 0 + movq %rdi,%r11 // save original ptr as return value + cmpq $(kShort),%rsi // long enough for SSE? + jg LNotShort // yes + +// Here for short operands or the end of long ones. +// %esi = length (<= kShort) +// %rdi = ptr +// %eax = zero + +Lshort: + cmpl $16,%esi // long enough to word align? + jge 3f // yes + test %esi,%esi // length==0? + jz 6f +1: + movb %al,(%rdi) // zero a byte + incq %rdi + decl %esi + jnz 1b + jmp 6f +2: + movb %al,(%rdi) // zero a byte + incq %rdi + decl %esi +3: + testl $3,%edi // is ptr doubleword aligned? + jnz 2b // no + movl %esi,%ecx // copy length + shrl $2,%esi // #doublewords to store +4: + movl %eax,(%rdi) // zero an aligned doubleword + addq $4,%rdi + decl %esi + jnz 4b + andl $3,%ecx // mask down to #bytes at end (0..3) + jz 6f // none +5: + movb %al,(%rdi) // zero a byte + incq %rdi + decl %ecx + jnz 5b +6: + movq %r11,%rax // set return value in case this was a call of memset() + popq %rbp + ret + + +// We will be using SSE, so align ptr. +// %rsi = length (> kShort) +// %rdi = ptr +// %eax = zero + +LNotShort: + movl %edi,%ecx // get #bytes to 16-byte align ptr + negl %ecx + andl $15,%ecx + jz LDestAligned // already aligned + subq %rcx,%rsi // decrement length +0: // loop storing bytes to align the ptr + movb %al,(%rdi) // pack in a byte + incq %rdi + decl %ecx + jnz 0b + +// Destination is now 16-byte aligned. Prepare to loop over 64-byte chunks. +// %rsi = length (> (kShort-15)) +// %rdi = ptr (aligned) +// %eax = zero + +LDestAligned: + movq %rsi,%rcx + andl $63,%esi // mask down to residual length (0..63) + andq $-64,%rcx // get #bytes we will zero in this loop + pxor %xmm0,%xmm0 // zero an SSE register + addq %rcx,%rdi // increment ptr by length to move + cmpq $(kVeryLong),%rcx // long enough to justify non-temporal stores? + jae LVeryLong // yes + negq %rcx // negate length to move + jmp 1f + +// Loop over 64-byte chunks, storing into cache. + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movdqa %xmm0,(%rdi,%rcx) + movdqa %xmm0,16(%rdi,%rcx) + movdqa %xmm0,32(%rdi,%rcx) + movdqa %xmm0,48(%rdi,%rcx) + addq $64,%rcx + jne 1b + + jmp Lshort + +// Very long operands: use non-temporal stores to bypass cache. + +LVeryLong: + negq %rcx // negate length to move + jmp 1f + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movntdq %xmm0,(%rdi,%rcx) + movntdq %xmm0,16(%rdi,%rcx) + movntdq %xmm0,32(%rdi,%rcx) + movntdq %xmm0,48(%rdi,%rcx) + addq $64,%rcx + jne 1b + + sfence // required by non-temporal stores + jmp Lshort + + + COMMPAGE_DESCRIPTOR(bzero_sse3_64,_COMM_PAGE_BZERO,kHasSSE3,0) diff --git a/osfmk/i386/commpage/cacheflush.s b/osfmk/i386/commpage/cacheflush.s index 078494ec6..876597fac 100644 --- a/osfmk/i386/commpage/cacheflush.s +++ b/osfmk/i386/commpage/cacheflush.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,19 +20,59 @@ * @APPLE_LICENSE_HEADER_END@ */ -#include #include #include .text .align 2, 0x90 +// void sysFlushDcache( void *p, size_t len ); +// 32-bit version + Lsys_flush_dcache: + movl 4(%esp),%ecx // get length + movl 8(%esp),%edx // get ptr + testl %ecx,%ecx // length 0? + jz 2f // yes + mfence // ensure previous stores make it to memory +1: + clflush (%edx) // flush a line + addl $64,%edx + subl $64,%ecx + jnc 1b + mfence // make sure memory is updated before we return +2: + ret + + COMMPAGE_DESCRIPTOR(sys_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) + + +// void sysFlushDcache( void *p, size_t len ); +// 64-bit version + .code64 +Lsys_flush_dcache_64: // %rdi = ptr, %rsi = length + testq %rsi,%rsi // length 0? + jz 2f // yes + mfence // ensure previous stores make it to memory +1: + clflush (%rdi) // flush a line + addq $64,%rdi + subq $64,%rsi + jnc 1b + mfence // make sure memory is updated before we return +2: ret + .code32 + COMMPAGE_DESCRIPTOR(sys_flush_dcache_64,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) + - COMMPAGE_DESCRIPTOR(sys_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,0,0) +// void sysIcacheInvalidate( void *p, size_t len ); Lsys_icache_invalidate: + // This is a NOP on intel processors, since the intent of the API + // is to make data executable, and Intel L1Is are coherent with L1D. + // We can use same routine both in 32 and 64-bit mode, since it is + // just a RET instruction. ret COMMPAGE_DESCRIPTOR(sys_icache_invalidate,_COMM_PAGE_FLUSH_ICACHE,0,0) diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index 6a729039d..5e347ac29 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,18 +23,18 @@ /* * Here's what to do if you want to add a new routine to the comm page: * - * 1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h, + * 1. Add a definition for it's address in osfmk/i386/cpu_capabilities.h, * being careful to reserve room for future expansion. * * 2. Write one or more versions of the routine, each with it's own * commpage_descriptor. The tricky part is getting the "special", * "musthave", and "canthave" fields right, so that exactly one * version of the routine is selected for every machine. - * The source files should be in osfmk/ppc/commpage/. + * The source files should be in osfmk/i386/commpage/. * * 3. Add a ptr to your new commpage_descriptor(s) in the "routines" - * array in commpage_populate(). Of course, you'll also have to - * declare them "extern" in commpage_populate(). + * array in osfmk/i386/commpage/commpage_asm.s. There are two + * arrays, one for the 32-bit and one for the 64-bit commpage. * * 4. Write the code in Libc to use the new routine. */ @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -50,16 +51,34 @@ #include #include +#include -extern vm_map_t com_region_map32; // the shared submap, set up in vm init +/* the lists of commpage routines are in commpage_asm.s */ +extern commpage_descriptor* commpage_32_routines[]; +extern commpage_descriptor* commpage_64_routines[]; -static uintptr_t next = 0; // next available byte in comm page -static int cur_routine = 0; // comm page address of "current" routine -static int matched; // true if we've found a match for "current" routine +/* translated commpage descriptors from commpage_sigs.c */ +extern commpage_descriptor sigdata_descriptor; +extern commpage_descriptor *ba_descriptors[]; + +extern vm_map_t com_region_map32; // the shared submap, set up in vm init +extern vm_map_t com_region_map64; // the shared submap, set up in vm init +char *commPagePtr32 = NULL; // virtual addr in kernel map of 32-bit commpage +char *commPagePtr64 = NULL; // ...and of 64-bit commpage int _cpu_capabilities = 0; // define the capability vector -char *commPagePtr = NULL; // virtual address of comm page in kernel map +int noVMX = 0; /* if true, do not set kHasAltivec in ppc _cpu_capabilities */ + +void* dsmos_blobs[3]; /* ptrs to the system integrity data in each commpage */ +int dsmos_blob_count = 0; + +static uintptr_t next; // next available byte in comm page +static int cur_routine; // comm page address of "current" routine +static int matched; // true if we've found a match for "current" routine + +static char *commPagePtr; // virtual addr in kernel map of commpage we are working on +static size_t commPageBaseOffset; // add to 32-bit runtime address to get offset in commpage /* Allocate the commpage and add to the shared submap created by vm: * 1. allocate a page in the kernel map (RW) @@ -69,59 +88,61 @@ char *commPagePtr = NULL; // virtual address of comm page in kerne */ static void* -commpage_allocate( void ) +commpage_allocate( + vm_map_t submap, // com_region_map32 or com_region_map64 + size_t area_used ) // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED { - vm_offset_t kernel_addr; // address of commpage in kernel map - vm_offset_t zero = 0; - vm_size_t size = _COMM_PAGE_AREA_LENGTH; - vm_map_entry_t entry; - ipc_port_t handle; - - if (com_region_map32 == NULL) - panic("commpage map is null"); - - if (vm_allocate(kernel_map,&kernel_addr,_COMM_PAGE_AREA_LENGTH,VM_FLAGS_ANYWHERE)) - panic("cannot allocate commpage"); - - if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+_COMM_PAGE_AREA_LENGTH,VM_PROT_DEFAULT,FALSE)) - panic("cannot wire commpage"); - - /* - * Now that the object is created and wired into the kernel map, mark it so that no delay - * copy-on-write will ever be performed on it as a result of mapping it into user-space. - * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and - * that would be a real disaster. - * - * JMM - What we really need is a way to create it like this in the first place. - */ - if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map) - panic("cannot find commpage entry"); - entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; - - if (mach_make_memory_entry( kernel_map, // target map - &size, // size - kernel_addr, // offset (address in kernel map) - VM_PROT_DEFAULT, // map it RW - &handle, // this is the object handle we get - NULL )) // parent_entry (what is this?) - panic("cannot make entry for commpage"); - - if (vm_map_64( com_region_map32, // target map (shared submap) - &zero, // address (map into 1st page in submap) - _COMM_PAGE_AREA_LENGTH, // size - 0, // mask - VM_FLAGS_FIXED, // flags (it must be 1st page in submap) - handle, // port is the memory entry we just made - 0, // offset (map 1st page in memory entry) - FALSE, // copy - VM_PROT_READ, // cur_protection (R-only in user map) - VM_PROT_READ, // max_protection - VM_INHERIT_SHARE )) // inheritance - panic("cannot map commpage"); - - ipc_port_release(handle); - - return (void*) kernel_addr; // return address in kernel map + vm_offset_t kernel_addr; // address of commpage in kernel map + vm_offset_t zero = 0; + vm_size_t size = area_used; // size actually populated + vm_map_entry_t entry; + ipc_port_t handle; + + if (submap == NULL) + panic("commpage submap is null"); + + if (vm_allocate(kernel_map,&kernel_addr,area_used,VM_FLAGS_ANYWHERE)) + panic("cannot allocate commpage"); + + if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+area_used,VM_PROT_DEFAULT,FALSE)) + panic("cannot wire commpage"); + + /* + * Now that the object is created and wired into the kernel map, mark it so that no delay + * copy-on-write will ever be performed on it as a result of mapping it into user-space. + * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and + * that would be a real disaster. + * + * JMM - What we really need is a way to create it like this in the first place. + */ + if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map) + panic("cannot find commpage entry"); + entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + + if (mach_make_memory_entry( kernel_map, // target map + &size, // size + kernel_addr, // offset (address in kernel map) + VM_PROT_DEFAULT, // map it RW + &handle, // this is the object handle we get + NULL )) // parent_entry (what is this?) + panic("cannot make entry for commpage"); + + if (vm_map_64( submap, // target map (shared submap) + &zero, // address (map into 1st page in submap) + area_used, // size + 0, // mask + VM_FLAGS_FIXED, // flags (it must be 1st page in submap) + handle, // port is the memory entry we just made + 0, // offset (map 1st page in memory entry) + FALSE, // copy + VM_PROT_READ, // cur_protection (R-only in user map) + VM_PROT_READ, // max_protection + VM_INHERIT_SHARE )) // inheritance + panic("cannot map commpage"); + + ipc_port_release(handle); + + return (void*) kernel_addr; // return address in kernel map } /* Get address (in kernel map) of a commpage field. */ @@ -130,7 +151,7 @@ static void* commpage_addr_of( int addr_at_runtime ) { - return (void*) ((uintptr_t)commPagePtr + addr_at_runtime - _COMM_PAGE_BASE_ADDRESS); + return (void*) ((uintptr_t)commPagePtr + addr_at_runtime - commPageBaseOffset); } /* Determine number of CPUs on this system. We cannot rely on @@ -164,6 +185,9 @@ commpage_init_cpu_capabilities( void ) ml_cpu_get_info(&cpu_info); switch (cpu_info.vector_unit) { + case 6: + bits |= kHasSupplementalSSE3; + /* fall thru */ case 5: bits |= kHasSSE3; /* fall thru */ @@ -200,15 +224,24 @@ commpage_init_cpu_capabilities( void ) bits |= kFastThreadLocalStorage; // we use %gs for TLS + if (cpu_mode_is64bit()) // k64Bit means processor is 64-bit capable + bits |= k64Bit; + _cpu_capabilities = bits; // set kernel version for use by drivers etc } +int +_get_cpu_capabilities() +{ + return _cpu_capabilities; +} + /* Copy data into commpage. */ static void commpage_stuff( int address, - void *source, + const void *source, int length ) { void *dest = commpage_addr_of(address); @@ -221,13 +254,38 @@ commpage_stuff( next = ((uintptr_t)dest + length); } +static void +commpage_stuff_swap( + int address, + void *source, + int length, + int legacy ) +{ + if ( legacy ) { + void *dest = commpage_addr_of(address); + dest = (void *)((uintptr_t) dest + _COMM_PAGE_SIGS_OFFSET); + switch (length) { + case 2: + OSWriteSwapInt16(dest, 0, *(uint16_t *)source); + break; + case 4: + OSWriteSwapInt32(dest, 0, *(uint32_t *)source); + break; + case 8: + OSWriteSwapInt64(dest, 0, *(uint64_t *)source); + break; + } + } +} static void commpage_stuff2( - int address, - void *source, - int length ) + int address, + void *source, + int length, + int legacy ) { + commpage_stuff_swap(address, source, length, legacy); commpage_stuff(address, source, length); } @@ -241,7 +299,7 @@ commpage_stuff_routine( if (rd->commpage_address != cur_routine) { if ((cur_routine!=0) && (matched==0)) - panic("commpage no match"); + panic("commpage no match for last, next address %08x", rd->commpage_address); cur_routine = rd->commpage_address; matched = 0; } @@ -251,100 +309,60 @@ commpage_stuff_routine( if ((must == rd->musthave) && (cant == 0)) { if (matched) - panic("commpage duplicate matches"); + panic("commpage multiple matches for address %08x", rd->commpage_address); matched = 1; commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length); } } - -#define COMMPAGE_DESC(name) commpage_ ## name -#define EXTERN_COMMPAGE_DESC(name) \ - extern commpage_descriptor COMMPAGE_DESC(name) - -EXTERN_COMMPAGE_DESC(compare_and_swap32_mp); -EXTERN_COMMPAGE_DESC(compare_and_swap32_up); -EXTERN_COMMPAGE_DESC(compare_and_swap64_mp); -EXTERN_COMMPAGE_DESC(compare_and_swap64_up); -EXTERN_COMMPAGE_DESC(atomic_add32_mp); -EXTERN_COMMPAGE_DESC(atomic_add32_up); -EXTERN_COMMPAGE_DESC(mach_absolute_time); -EXTERN_COMMPAGE_DESC(spin_lock_try_mp); -EXTERN_COMMPAGE_DESC(spin_lock_try_up); -EXTERN_COMMPAGE_DESC(spin_lock_mp); -EXTERN_COMMPAGE_DESC(spin_lock_up); -EXTERN_COMMPAGE_DESC(spin_unlock); -EXTERN_COMMPAGE_DESC(pthread_getspecific); -EXTERN_COMMPAGE_DESC(gettimeofday); -EXTERN_COMMPAGE_DESC(sys_flush_dcache); -EXTERN_COMMPAGE_DESC(sys_icache_invalidate); -EXTERN_COMMPAGE_DESC(pthread_self); -EXTERN_COMMPAGE_DESC(relinquish); -EXTERN_COMMPAGE_DESC(bit_test_and_set_mp); -EXTERN_COMMPAGE_DESC(bit_test_and_set_up); -EXTERN_COMMPAGE_DESC(bit_test_and_clear_mp); -EXTERN_COMMPAGE_DESC(bit_test_and_clear_up); -EXTERN_COMMPAGE_DESC(bzero_scalar); -EXTERN_COMMPAGE_DESC(bcopy_scalar); -EXTERN_COMMPAGE_DESC(nanotime); - -static commpage_descriptor *routines[] = { - &COMMPAGE_DESC(compare_and_swap32_mp), - &COMMPAGE_DESC(compare_and_swap32_up), - &COMMPAGE_DESC(compare_and_swap64_mp), - &COMMPAGE_DESC(compare_and_swap64_up), - &COMMPAGE_DESC(atomic_add32_mp), - &COMMPAGE_DESC(atomic_add32_up), - &COMMPAGE_DESC(mach_absolute_time), - &COMMPAGE_DESC(spin_lock_try_mp), - &COMMPAGE_DESC(spin_lock_try_up), - &COMMPAGE_DESC(spin_lock_mp), - &COMMPAGE_DESC(spin_lock_up), - &COMMPAGE_DESC(spin_unlock), - &COMMPAGE_DESC(pthread_getspecific), - &COMMPAGE_DESC(gettimeofday), - &COMMPAGE_DESC(sys_flush_dcache), - &COMMPAGE_DESC(sys_icache_invalidate), - &COMMPAGE_DESC(pthread_self), - &COMMPAGE_DESC(relinquish), - &COMMPAGE_DESC(bit_test_and_set_mp), - &COMMPAGE_DESC(bit_test_and_set_up), - &COMMPAGE_DESC(bit_test_and_clear_mp), - &COMMPAGE_DESC(bit_test_and_clear_up), - &COMMPAGE_DESC(bzero_scalar), - &COMMPAGE_DESC(bcopy_scalar), - &COMMPAGE_DESC(nanotime), - NULL -}; - - -/* Fill in commpage: called once, during kernel initialization, from the - * startup thread before user-mode code is running. - * See the top of this file for a list of what you have to do to add - * a new routine to the commpage. +/* Fill in the 32- or 64-bit commpage. Called once for each. + * The 32-bit ("legacy") commpage has a bunch of stuff added to it + * for translated processes, some of which is byte-swapped. */ -void -commpage_populate( void ) +static void +commpage_populate_one( + vm_map_t submap, // com_region_map32 or com_region_map64 + char ** kernAddressPtr, // &commPagePtr32 or &commPagePtr64 + size_t area_used, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED + size_t base_offset, // will become commPageBaseOffset + commpage_descriptor** commpage_routines, // list of routine ptrs for this commpage + boolean_t legacy, // true if 32-bit commpage + const char* signature ) // "commpage 32-bit" or "commpage 64-bit" { short c2; static double two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52 static double ten6 = 1000000.0; // 10**6 commpage_descriptor **rd; short version = _COMM_PAGE_THIS_VERSION; + int swapcaps; - commPagePtr = (char *)commpage_allocate(); - - commpage_init_cpu_capabilities(); + next = (uintptr_t) NULL; + cur_routine = 0; + commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used ); + *kernAddressPtr = commPagePtr; // save address either in commPagePtr32 or 64 + commPageBaseOffset = base_offset; /* Stuff in the constants. We move things into the comm page in strictly * ascending order, so we can check for overlap and panic if so. */ - - commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short)); - commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities, - sizeof(int)); + commpage_stuff(_COMM_PAGE_SIGNATURE,signature,strlen(signature)); + commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short),legacy); + commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int)); + + /* excuse our magic constants, we cannot include ppc/cpu_capabilities.h */ + /* always set kCache32 and kDcbaAvailable */ + swapcaps = 0x44; + if ( _cpu_capabilities & kUP ) + swapcaps |= (kUP + (1 << kNumCPUsShift)); + else + swapcaps |= 2 << kNumCPUsShift; /* limit #cpus to 2 */ + if ( ! noVMX ) /* if rosetta will be emulating altivec... */ + swapcaps |= 0x101; /* ...then set kHasAltivec and kDataStreamsAvailable too */ + commpage_stuff_swap(_COMM_PAGE_CPU_CAPABILITIES, &swapcaps, sizeof(int), legacy); + c2 = 32; + commpage_stuff_swap(_COMM_PAGE_CACHE_LINESIZE,&c2,2,legacy); if (_cpu_capabilities & kCache32) c2 = 32; @@ -354,13 +372,12 @@ commpage_populate( void ) c2 = 128; commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2); - c2 = 32; - - commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8); - - commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8); + if ( legacy ) { + commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8,legacy); + commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8,legacy); + } - for( rd = routines; *rd != NULL ; rd++ ) + for( rd = commpage_routines; *rd != NULL ; rd++ ) commpage_stuff_routine(*rd); if (!matched) @@ -369,35 +386,54 @@ commpage_populate( void ) if (next > (uintptr_t)_COMM_PAGE_END) panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%08x", next, (uintptr_t)commPagePtr); + if ( legacy ) { + next = (uintptr_t) NULL; + for( rd = ba_descriptors; *rd != NULL ; rd++ ) + commpage_stuff_routine(*rd); + + next = (uintptr_t) NULL; + commpage_stuff_routine(&sigdata_descriptor); + } - pmap_commpage_init((vm_offset_t) commPagePtr, _COMM_PAGE_BASE_ADDRESS, - _COMM_PAGE_AREA_LENGTH/INTEL_PGBYTES); + /* salt away a ptr to the system integrity data in this commpage */ + dsmos_blobs[dsmos_blob_count++] = + commpage_addr_of( _COMM_PAGE_SYSTEM_INTEGRITY ); } -/* - * This macro prevents compiler instruction scheduling: - */ -#define NO_REORDERING asm volatile("" : : : "memory") + +/* Fill in commpages: called once, during kernel initialization, from the + * startup thread before user-mode code is running. + * + * See the top of this file for a list of what you have to do to add + * a new routine to the commpage. + */ void -commpage_set_nanotime(commpage_nanotime_t *newp) +commpage_populate( void ) { - commpage_nanotime_t *cnp; - - /* Nop if commpage not set up yet */ - if (commPagePtr == NULL) - return; - - cnp = (commpage_nanotime_t *)commpage_addr_of(_COMM_PAGE_NANOTIME_INFO); + commpage_init_cpu_capabilities(); + + commpage_populate_one( com_region_map32, + &commPagePtr32, + _COMM_PAGE32_AREA_USED, + _COMM_PAGE32_BASE_ADDRESS, + commpage_32_routines, + TRUE, /* legacy (32-bit) commpage */ + "commpage 32-bit"); + pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, + _COMM_PAGE32_AREA_USED/INTEL_PGBYTES); + + if (_cpu_capabilities & k64Bit) { + commpage_populate_one( com_region_map64, + &commPagePtr64, + _COMM_PAGE64_AREA_USED, + _COMM_PAGE32_START_ADDRESS, /* because kernel is built 32-bit */ + commpage_64_routines, + FALSE, /* not a legacy commpage */ + "commpage 64-bit"); + pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS, + _COMM_PAGE64_AREA_USED/INTEL_PGBYTES); + } - /* - * Update in reverse order: - * check_tsc first - it's read and compared with base_tsc last. - */ - cnp->nt_check_tsc = newp->nt_base_tsc; NO_REORDERING; - cnp->nt_shift = newp->nt_shift; NO_REORDERING; - cnp->nt_scale = newp->nt_scale; NO_REORDERING; - cnp->nt_base_ns = newp->nt_base_ns; NO_REORDERING; - cnp->nt_base_tsc = newp->nt_base_tsc; + rtc_nanotime_init_commpage(); } - diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h index 2a14d32eb..2a5aaae11 100644 --- a/osfmk/i386/commpage/commpage.h +++ b/osfmk/i386/commpage/commpage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,8 +27,15 @@ #include #endif /* __ASSEMBLER__ */ +/* The following macro is used to generate the 64-bit commpage address for a given + * routine, based on its 32-bit address. This is used in the kernel to compile + * the 64-bit commpage. Since the kernel is a 32-bit object, cpu_capabilities.h + * only defines the 32-bit address. + */ +#define _COMM_PAGE_32_TO_64( ADDRESS ) ( ADDRESS + _COMM_PAGE64_START_ADDRESS - _COMM_PAGE32_START_ADDRESS ) + + #ifdef __ASSEMBLER__ -#include #define COMMPAGE_DESCRIPTOR(label,address,must,cant) \ L ## label ## _end: ;\ @@ -59,18 +66,15 @@ typedef struct commpage_descriptor { } commpage_descriptor; -extern char *commPagePtr; // virt address of commpage in kernel map +extern char *commPagePtr32; // virt address of 32-bit commpage in kernel map +extern char *commPagePtr64; // ...and of 64-bit commpage + +extern void _commpage_set_timestamp(uint64_t abstime, uint64_t secs); +#define commpage_set_timestamp(x, y, z) _commpage_set_timestamp((x), (y)) -extern void commpage_set_timestamp(uint64_t tbr,uint32_t secs,uint32_t usecs,uint32_t ticks_per_sec); +extern void commpage_set_nanotime(uint64_t tsc_base, uint64_t ns_base, uint32_t scale, uint32_t shift); -typedef struct { - uint64_t nt_base_tsc; - uint64_t nt_base_ns; - uint32_t nt_scale; - uint32_t nt_shift; - uint64_t nt_check_tsc; -} commpage_nanotime_t; -extern void commpage_set_nanotime(commpage_nanotime_t *new_nanotime); +#include #endif /* __ASSEMBLER__ */ diff --git a/osfmk/i386/commpage/commpage_asm.s b/osfmk/i386/commpage/commpage_asm.s new file mode 100644 index 000000000..f4ff05ed6 --- /dev/null +++ b/osfmk/i386/commpage/commpage_asm.s @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + + .text + .align 2, 0x90 + .globl __commpage_set_timestamp +/* extern void _commpage_set_timestamp(uint64_t abstime, uint64_t secs); */ +__commpage_set_timestamp: + push %ebp + mov %esp,%ebp + + mov _commPagePtr32,%ecx + sub $ _COMM_PAGE32_BASE_ADDRESS,%ecx + mov _commPagePtr64,%edx /* point to 64-bit commpage too */ + mov %edx,%eax + sub $ _COMM_PAGE32_START_ADDRESS,%edx /* because kernel is built 32-bit */ + test %eax,%eax + cmovz %ecx,%edx /* if no 64-bit commpage, point to 32 with both */ + + movl $0,_COMM_PAGE_TIMEENABLE(%ecx) + movl $0,_COMM_PAGE_TIMEENABLE(%edx) + + mov 8(%ebp),%eax + or 12(%ebp),%eax + je 1f + + mov 8(%ebp),%eax + mov %eax,_COMM_PAGE_TIMEBASE(%ecx) + mov %eax,_COMM_PAGE_TIMEBASE(%edx) + mov 12(%ebp),%eax + mov %eax,_COMM_PAGE_TIMEBASE+4(%ecx) + mov %eax,_COMM_PAGE_TIMEBASE+4(%edx) + + mov 16(%ebp),%eax + mov %eax,_COMM_PAGE_TIMESTAMP(%ecx) + mov %eax,_COMM_PAGE_TIMESTAMP(%edx) + mov 20(%ebp),%eax + mov %eax,_COMM_PAGE_TIMESTAMP+4(%ecx) + mov %eax,_COMM_PAGE_TIMESTAMP+4(%edx) + + movl $1,_COMM_PAGE_TIMEENABLE(%ecx) + movl $1,_COMM_PAGE_TIMEENABLE(%edx) +1: + pop %ebp + ret + + .text + .align 2, 0x90 + .globl _commpage_set_nanotime +/* extern void commpage_set_nanotime(uint64_t tsc_base, uint64_t ns_base, uint32_t scale, uint32_t shift); */ +_commpage_set_nanotime: + push %ebp + mov %esp,%ebp + + mov _commPagePtr32,%ecx + testl %ecx,%ecx + je 1f + + sub $(_COMM_PAGE_BASE_ADDRESS),%ecx + mov _commPagePtr64,%edx /* point to 64-bit commpage too */ + mov %edx,%eax + sub $ _COMM_PAGE32_START_ADDRESS,%edx /* because kernel is built 32-bit */ + test %eax,%eax + cmovz %ecx,%edx /* if no 64-bit commpage, point to 32 with both */ + + mov 8(%ebp),%eax + mov %eax,_COMM_PAGE_NT_TSC_BASE(%ecx) + mov %eax,_COMM_PAGE_NT_TSC_BASE(%edx) + mov 12(%ebp),%eax + mov %eax,_COMM_PAGE_NT_TSC_BASE+4(%ecx) + mov %eax,_COMM_PAGE_NT_TSC_BASE+4(%edx) + + mov 24(%ebp),%eax + mov %eax,_COMM_PAGE_NT_SCALE(%ecx) + mov %eax,_COMM_PAGE_NT_SCALE(%edx) + + mov 28(%ebp),%eax + mov %eax,_COMM_PAGE_NT_SHIFT(%ecx) + mov %eax,_COMM_PAGE_NT_SHIFT(%edx) + + mov 16(%ebp),%eax + mov %eax,_COMM_PAGE_NT_NS_BASE(%ecx) + mov %eax,_COMM_PAGE_NT_NS_BASE(%edx) + mov 20(%ebp),%eax + mov %eax,_COMM_PAGE_NT_NS_BASE+4(%ecx) + mov %eax,_COMM_PAGE_NT_NS_BASE+4(%edx) +1: + pop %ebp + ret + +#define CPN(routine) _commpage_ ## routine + +/* pointers to the 32-bit commpage routine descriptors */ +/* WARNING: these must be sorted by commpage address! */ + .const_data + .align 2 + .globl _commpage_32_routines +_commpage_32_routines: + .long CPN(compare_and_swap32_mp) + .long CPN(compare_and_swap32_up) + .long CPN(compare_and_swap64_mp) + .long CPN(compare_and_swap64_up) + .long CPN(atomic_add32_mp) + .long CPN(atomic_add32_up) + .long CPN(mach_absolute_time) + .long CPN(spin_lock_try_mp) + .long CPN(spin_lock_try_up) + .long CPN(spin_lock_mp) + .long CPN(spin_lock_up) + .long CPN(spin_unlock) + .long CPN(pthread_getspecific) + .long CPN(gettimeofday) + .long CPN(sys_flush_dcache) + .long CPN(sys_icache_invalidate) + .long CPN(pthread_self) +// .long CPN(relinquish) + .long CPN(bit_test_and_set_mp) + .long CPN(bit_test_and_set_up) + .long CPN(bit_test_and_clear_mp) + .long CPN(bit_test_and_clear_up) + .long CPN(bzero_scalar) + .long CPN(bzero_sse3) + .long CPN(bcopy_scalar) + .long CPN(bcopy_sse3) + .long CPN(bcopy_sse4) + .long CPN(old_nanotime) + .long CPN(memset_pattern_sse3) + .long CPN(longcopy_sse4) + .long CPN(nanotime) + .long 0 + + +/* pointers to the 64-bit commpage routine descriptors */ +/* WARNING: these must be sorted by commpage address! */ + .const_data + .align 2 + .globl _commpage_64_routines +_commpage_64_routines: + .long CPN(compare_and_swap32_mp_64) + .long CPN(compare_and_swap32_up_64) + .long CPN(compare_and_swap64_mp_64) + .long CPN(compare_and_swap64_up_64) + .long CPN(atomic_add32_mp_64) + .long CPN(atomic_add32_up_64) + .long CPN(atomic_add64_mp_64) + .long CPN(atomic_add64_up_64) + .long CPN(mach_absolute_time) + .long CPN(spin_lock_try_mp_64) + .long CPN(spin_lock_try_up_64) + .long CPN(spin_lock_mp_64) + .long CPN(spin_lock_up_64) + .long CPN(spin_unlock_64) + .long CPN(pthread_getspecific_64) + .long CPN(gettimeofday_64) + .long CPN(sys_flush_dcache_64) + .long CPN(sys_icache_invalidate) /* same routine as 32-bit version, just a "ret" */ + .long CPN(pthread_self_64) + .long CPN(bit_test_and_set_mp_64) + .long CPN(bit_test_and_set_up_64) + .long CPN(bit_test_and_clear_mp_64) + .long CPN(bit_test_and_clear_up_64) + .long CPN(bzero_sse3_64) + .long CPN(bcopy_sse4_64) + .long CPN(old_nanotime_64) + .long CPN(memset_pattern_sse3_64) + .long CPN(longcopy_sse4_64) + .long CPN(nanotime_64) + .long 0 + diff --git a/osfmk/i386/commpage/commpage_gettimeofday.s b/osfmk/i386/commpage/commpage_gettimeofday.s index 6e84e601c..55bfd27ba 100644 --- a/osfmk/i386/commpage/commpage_gettimeofday.s +++ b/osfmk/i386/commpage/commpage_gettimeofday.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,11 +24,106 @@ #include #include +#define NSEC_PER_SEC 1000*1000*1000 +#define NSEC_PER_USEC 1000 + .text .align 2, 0x90 Lgettimeofday: - int $0x3 + push %ebp + mov %esp,%ebp + push %esi + push %edi + push %ebx + +0: + cmp $0,_COMM_PAGE_TIMEENABLE + je 4f + mov _COMM_PAGE_TIMEBASE,%esi + mov _COMM_PAGE_TIMEBASE+4,%edi + mov _COMM_PAGE_TIMESTAMP,%ebx + + mov $ _COMM_PAGE_NANOTIME,%eax + call *%eax /* get ns in %edx:%eax */ + + cmp _COMM_PAGE_TIMEBASE,%esi + jne 0b + cmp _COMM_PAGE_TIMEBASE+4,%edi + jne 0b + cmp $0,_COMM_PAGE_TIMEENABLE + je 4f + + mov $ NSEC_PER_SEC,%ecx + sub %esi,%eax + sbb %edi,%edx + div %ecx + add %eax,%ebx + + mov $ NSEC_PER_USEC,%ecx + mov %edx,%eax + xor %edx,%edx + div %ecx + + mov 8(%ebp),%ecx + mov %ebx,(%ecx) + mov %eax,4(%ecx) + xor %eax,%eax + +3: + pop %ebx + pop %edi + pop %esi + pop %ebp ret +4: /* fail */ + movl $1,%eax + jmp 3b COMMPAGE_DESCRIPTOR(gettimeofday,_COMM_PAGE_GETTIMEOFDAY,0,0) + + + .code64 + .text + .align 2, 0x90 + +Lgettimeofday_64: // %rdi = ptr to timeval + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq %rdi,%r9 // save ptr to timeval + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_TIMEBASE),%r10 +0: + cmpl $0,_TIMEENABLE(%r10) // is data valid? (test _COMM_PAGE_TIMEENABLE) + jz 4f // no + movq _TIMEBASE(%r10),%r11 // get _COMM_PAGE_TIMEBASE + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_NANOTIME),%rax + call *%rax // get %rax <- nanotime(), preserving %r9, %r10 and %r11 + movl _TIMESTAMP(%r10),%r8d // get _COMM_PAGE_TIMESTAMP + cmpq _TIMEBASE(%r10),%r11 // has _COMM_PAGE_TIMEBASE changed? + jne 0b // loop until we have consistent data + cmpl $0,_TIMEENABLE(%r10) // is data valid? (test _COMM_PAGE_TIMEENABLE) + jz 4f // no + + movl $ NSEC_PER_SEC,%ecx + subq %r11,%rax // generate nanoseconds since timestamp + movq %rax,%rdx + shrq $32,%rdx // get high half of delta in %edx + divl %ecx // %eax <- seconds since timestamp, %edx <- nanoseconds + addl %eax,%r8d // add seconds elapsed to timestamp seconds + + movl $ NSEC_PER_USEC,%ecx + movl %edx,%eax + xorl %edx,%edx + divl %ecx // divide residual ns by 1000 to get residual us in %eax + + movq %r8,(%r9) // store 64-bit seconds into timeval + movl %eax,8(%r9) // store 32-bit useconds into timeval + xorl %eax,%eax // return 0 for success +3: + popq %rbp + ret +4: // fail + movl $1,%eax + jmp 3b + + COMMPAGE_DESCRIPTOR(gettimeofday_64,_COMM_PAGE_GETTIMEOFDAY,0,0) diff --git a/osfmk/i386/commpage/commpage_mach_absolute_time.s b/osfmk/i386/commpage/commpage_mach_absolute_time.s index 3427958ed..71592ec0e 100644 --- a/osfmk/i386/commpage/commpage_mach_absolute_time.s +++ b/osfmk/i386/commpage/commpage_mach_absolute_time.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,89 +31,87 @@ .align 2, 0x90 Lmach_absolute_time: - int $0x3 ret - COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,1,0) - + COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,0,0) +/* Nanotime is being moved out of the way of bcopy in the commpage. + * First we put it in both places, old and new. Then, when all the build + * trains have rebuilt libSystem, we can remove the deprecated instance. + */ + +/* return nanotime in %edx:%eax */ +Lold_nanotime: Lnanotime: + push %ebp + mov %esp,%ebp + push %esi + push %edi + push %ebx + +0: + mov _COMM_PAGE_NT_TSC_BASE,%esi + mov _COMM_PAGE_NT_TSC_BASE+4,%edi + + rdtsc + sub %esi,%eax + sbb %edi,%edx + + mov _COMM_PAGE_NT_SCALE,%ecx + + mov %edx,%ebx + mull %ecx + mov %ebx,%eax + mov %edx,%ebx + mull %ecx + add %ebx,%eax + adc $0,%edx + + add _COMM_PAGE_NT_NS_BASE,%eax + adc _COMM_PAGE_NT_NS_BASE+4,%edx + + cmp _COMM_PAGE_NT_TSC_BASE,%esi + jne 0b + cmp _COMM_PAGE_NT_TSC_BASE+4,%edi + jne 0b + + pop %ebx + pop %edi + pop %esi + pop %ebp + ret - pushl %ebx - pushl %esi - pushl %edi - pushl %ebp - movl $(_COMM_PAGE_NANOTIME_INFO), %esi - - /* - * The nanotime info consists of: - * - base_tsc 64-bit timestamp register value - * - base_ns 64-bit corresponding nanosecond uptime value - * - scale 32-bit current scale multiplier - * - shift 32-bit current shift divider - * - check_tsc 64-bit timestamp check value - * - * This enables an timestamp register's value, tsc, to be converted - * into a nanosecond nanotime value, ns: - * - * ns = base_ns + ((tsc - base_tsc) * scale >> shift) - * - * The kernel updates this every tick or whenever a performance - * speed-step changes the scaling. To avoid locking, a duplicated - * sequence counting scheme is used. The base_tsc value is updated - * whenever the info starts to be changed, and check_tsc is updated - * to the same value at the end of the update. The regularity of - * update ensures that (tsc - base_tsc) is a 32-bit quantity. - * When a conversion is performed, we read base_tsc before we start - * and check_tsc at the end -- if there's a mis-match we repeat. - * It's sufficient to compare only the low-order 32-bits. - */ + COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,0,0) + COMMPAGE_DESCRIPTOR(old_nanotime,_COMM_PAGE_OLD_NANOTIME,0,0) + +/* The 64-bit version. We return the 64-bit nanotime in %rax, + * and by convention we must preserve %r9, %r10, and %r11. + */ + .text + .align 2 + .code64 +Lold_nanotime_64: +Lnanotime_64: // NB: must preserve r9, r10, and r11 + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_NT_TSC_BASE),%rsi 1: - // - // Read nanotime info and stash in registers. - // - movl NANOTIME_BASE_TSC(%esi), %ebx // ebx := lo(base_tsc) - movl NANOTIME_BASE_NS(%esi), %ebp - movl NANOTIME_BASE_NS+4(%esi), %edi // edi:ebp := base_ns - movl NANOTIME_SHIFT(%esi), %ecx // ecx := shift - // - // Read timestamp register (tsc) and calculate delta. - // - rdtsc // edx:eax := tsc - subl %ebx, %eax // eax := (tsc - base_tsc) - movl NANOTIME_SCALE(%esi), %edx // edx := shift - // - // Check for consistency and re-read if necessary. - // - cmpl NANOTIME_CHECK_TSC(%esi), %ebx + movq _NT_TSC_BASE(%rsi),%r8 // r8 := base_tsc + rdtsc // edx:eax := tsc + shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc + orq %rdx,%rax + subq %r8, %rax // rax := (tsc - base_tsc) + movl _NT_SCALE(%rsi),%ecx + mulq %rcx // rdx:rax := (tsc - base_tsc) * scale + shrdq $32,%rdx,%rax // _COMM_PAGE_NT_SHIFT is always 32 + addq _NT_NS_BASE(%rsi),%rax // (((tsc - base_tsc) * scale) >> 32) + ns_base + + cmpq _NT_TSC_BASE(%rsi),%r8 // did the data change during computation? jne 1b - - // - // edx:eax := ((tsc - base_tsc) * scale) - // - mull %edx - - // - // eax := ((tsc - base_tsc) * scale >> shift) - // - shrdl %cl, %edx, %eax - andb $32, %cl - cmovnel %edx, %eax // %eax := %edx if shift == 32 - xorl %edx, %edx - - // - // Add base_ns: - // edx:eax = (base_ns + ((tsc - base_tsc) * scale >> shift)) - // - addl %ebp, %eax - adcl %edi, %edx - - popl %ebp - popl %edi - popl %esi - popl %ebx + popq %rbp ret - COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,1,0) + COMMPAGE_DESCRIPTOR(nanotime_64,_COMM_PAGE_NANOTIME,0,0) + COMMPAGE_DESCRIPTOR(old_nanotime_64,_COMM_PAGE_OLD_NANOTIME,0,0) diff --git a/osfmk/i386/commpage/commpage_sigs.c b/osfmk/i386/commpage/commpage_sigs.c new file mode 100644 index 000000000..8b7e7d404 --- /dev/null +++ b/osfmk/i386/commpage/commpage_sigs.c @@ -0,0 +1,189 @@ +#include "commpage.h" + +static unsigned int sigdata[] = +{ + 0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, + 0x00323370, 0x06004018, 0x2828e07c, 0x4018077c, 0x14008240, 0x2d29807c, + 0xf0ffa240, 0x01006038, 0x2000804e, 0x00006038, 0x2000804e, 0x06004018, + 0x00000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 0x00323370, + 0x06004018, 0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, + 0x6177735f, 0x62323370, 0x00000000, 0x06004018, 0xac06007c, 0x2828e07c, + 0x4018077c, 0x18008240, 0x2d29807c, 0xf0ffa240, 0x2c01004c, 0x01006038, + 0x2000804e, 0x00006038, 0x2000804e, 0x2000804e, 0x06004018, 0x00000000, + 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 0x62323370, 0x00000000, + 0x06004018, 0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, + 0x6177735f, 0x62343670, 0x00000000, 0x06004018, 0xac04207c, 0xa828e07c, + 0x4018277c, 0x1800c240, 0xad29807c, 0xf0ffc240, 0x2c01004c, 0x01006038, + 0x2000804e, 0xf8ff0039, 0x00006038, 0xad09887c, 0x2000804e, 0x2000804e, + 0x06004018, 0x00000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, + 0x62343670, 0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x6d656d5f, + 0x5f746573, 0x74746170, 0x006e7265, 0x06004018, 0x10000071, 0x000009c8, + 0x080029c8, 0x14008241, 0x000008d8, 0x080028d8, 0xf0ff8438, 0x10000839, + 0xbfd18054, 0x0000807f, 0xbe068454, 0x70008241, 0xec47007c, 0xa603097c, + 0x2000c038, 0x3400be41, 0x000008d8, 0x080028d8, 0x100008d8, 0x180028d8, + 0x200008d8, 0x280028d8, 0x300008d8, 0x380028d8, 0x40000839, 0xdcff0042, + 0x34000048, 0xec47007c, 0xec47067c, 0x000008d8, 0x080028d8, 0x100008d8, + 0x180028d8, 0x200008d8, 0x280028d8, 0x300008d8, 0x380028d8, 0x40000839, + 0xd4ff0042, 0x3fe18054, 0x3e078454, 0x2000824d, 0xa603097c, 0x000008d8, + 0x080028d8, 0x10000839, 0xf4ff0042, 0x2000804e, 0x2000804e, 0x06004018, + 0x00000000, 0x6d656d5f, 0x5f746573, 0x74746170, 0x006e7265, 0x06004018, + 0x06004018, 0x01000000, 0x6f74615f, 0x5f63696d, 0x75716e65, 0x00657565, + 0x06004018, 0x2818c07c, 0x2e29c47c, 0x00000060, 0x2d19807c, 0x2000e24d, + 0xecffff4b, 0x2000804e, 0x06004018, 0x00000000, 0x6f74615f, 0x5f63696d, + 0x75716e65, 0x00657565, 0x06004018, 0x06004018, 0x01000000, 0x6f74615f, + 0x5f63696d, 0x75716564, 0x00657565, 0x06004018, 0x781b657c, 0x2828607c, + 0x0000032c, 0x2000824d, 0x2e20c37c, 0x2d29c07c, 0xecffc240, 0x00000060, + 0x2000804e, 0x06004018, 0x00000000, 0x6f74615f, 0x5f63696d, 0x75716564, + 0x00657565, 0x06004018, 0x06004018, 0x01000000, 0x6f74615f, 0x5f63696d, + 0x72726162, 0x00726569, 0x06004018, 0x2000804e, 0x06004018, 0x00000000, + 0x6f74615f, 0x5f63696d, 0x72726162, 0x00726569, 0x06004018, 0x06004018, + 0x01000000, 0x6f74615f, 0x5f63696d, 0x5f646461, 0x00003233, 0x06004018, + 0x2820a07c, 0x142ac37c, 0x2d21c07c, 0xf4ffc240, 0x7833c37c, 0x2000804e, + 0x06004018, 0x00000000, 0x6f74615f, 0x5f63696d, 0x5f646461, 0x00003233, + 0x06004018, 0x06004018, 0x01000000, 0x63616d5f, 0x62615f68, 0x756c6f73, + 0x745f6574, 0x00656d69, 0x06004018, 0x00004018, 0x04006000, 0x00000000, + 0x00000000, 0x63616d5f, 0x62615f68, 0x756c6f73, 0x745f6574, 0x5f656d69, + 0x68676968, 0x00000000, 0x00004018, 0x00004018, 0x04008000, 0x00000000, + 0x00000000, 0x63616d5f, 0x62615f68, 0x756c6f73, 0x745f6574, 0x5f656d69, + 0x00776f6c, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x63616d5f, + 0x62615f68, 0x756c6f73, 0x745f6574, 0x00656d69, 0x06004018, 0x06004018, + 0x01000000, 0x6970735f, 0x6f6c5f6e, 0x745f6b63, 0x00007972, 0x06004018, + 0x05004018, 0x04006000, 0x05004018, 0x00004018, 0x04006000, 0x00000000, + 0x00000000, 0x6970735f, 0x6f6c5f6e, 0x745f6b63, 0x775f7972, 0x70706172, + 0x00007265, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6970735f, + 0x6f6c5f6e, 0x745f6b63, 0x00007972, 0x06004018, 0x06004018, 0x01000000, + 0x6970735f, 0x6f6c5f6e, 0x00006b63, 0x06004018, 0x05004018, 0x04006000, + 0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, 0x6970735f, + 0x6f6c5f6e, 0x775f6b63, 0x70706172, 0x00007265, 0x00004018, 0x2000804e, + 0x06004018, 0x00000000, 0x6970735f, 0x6f6c5f6e, 0x00006b63, 0x06004018, + 0x06004018, 0x01000000, 0x6970735f, 0x6e755f6e, 0x6b636f6c, 0x00000000, + 0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x00004018, 0x00000000, + 0x00000000, 0x00000000, 0x6970735f, 0x6e755f6e, 0x6b636f6c, 0x00000000, + 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6970735f, 0x6e755f6e, + 0x6b636f6c, 0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x6874705f, + 0x64616572, 0x7465675f, 0x63657073, 0x63696669, 0x00000000, 0x06004018, + 0x02004018, 0xc082ffff, 0x02004018, 0x2000804e, 0x06004018, 0x00000000, + 0x6874705f, 0x64616572, 0x7465675f, 0x63657073, 0x63696669, 0x00000000, + 0x06004018, 0x06004018, 0x01000000, 0x7465675f, 0x656d6974, 0x6164666f, + 0x00000079, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x00004018, + 0x04006000, 0x00000000, 0x00000000, 0x7465675f, 0x656d6974, 0x6164666f, + 0x72775f79, 0x65707061, 0x00000072, 0x00004018, 0x2000804e, 0x06004018, + 0x00000000, 0x7465675f, 0x656d6974, 0x6164666f, 0x00000079, 0x06004018, + 0x06004018, 0x01000000, 0x7379735f, 0x6163645f, 0x5f656863, 0x73756c66, + 0x00000068, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x05004018, + 0x04008000, 0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, + 0x7379735f, 0x6163645f, 0x5f656863, 0x73756c66, 0x00000068, 0x00004018, + 0x2000804e, 0x06004018, 0x00000000, 0x7379735f, 0x6163645f, 0x5f656863, + 0x73756c66, 0x00000068, 0x06004018, 0x06004018, 0x01000000, 0x7379735f, + 0x6163695f, 0x5f656863, 0x61766e69, 0x6164696c, 0x00006574, 0x06004018, + 0x05004018, 0x04006000, 0x05004018, 0x05004018, 0x04008000, 0x05004018, + 0x00004018, 0x00000000, 0x00000000, 0x00000000, 0x7379735f, 0x6163695f, + 0x5f656863, 0x61766e69, 0x6164696c, 0x775f6574, 0x70706172, 0x00007265, + 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x7379735f, 0x6163695f, + 0x5f656863, 0x61766e69, 0x6164696c, 0x00006574, 0x06004018, 0x06004018, + 0x01000000, 0x6874705f, 0x64616572, 0x6c65735f, 0x00000066, 0x06004018, + 0x02004018, 0x8085ffff, 0x02004018, 0x2000804e, 0x06004018, 0x00000000, + 0x6874705f, 0x64616572, 0x6c65735f, 0x00000066, 0x06004018, 0x06004018, + 0x01000000, 0x657a625f, 0x00006f72, 0x06004018, 0x05004018, 0x04006000, + 0x05004018, 0x05004018, 0x04008000, 0x05004018, 0x00004018, 0x00000000, + 0x00000000, 0x00000000, 0x657a625f, 0x00006f72, 0x00004018, 0x2000804e, + 0x06004018, 0x00000000, 0x657a625f, 0x00006f72, 0x06004018, 0x06004018, + 0x01000000, 0x6f63625f, 0x00007970, 0x06004018, 0x05004018, 0x04006000, + 0x05004018, 0x05004018, 0x04008000, 0x05004018, 0x05004018, 0x0400a000, + 0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, 0x6f63625f, + 0x00007970, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6f63625f, + 0x00007970, 0x06004018, 0x06004018, 0x01000000, 0x6d656d5f, 0x65766f6d, + 0x00000000, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x05004018, + 0x04008000, 0x05004018, 0x05004018, 0x0400a000, 0x05004018, 0x00004018, + 0x00000000, 0x00000000, 0x00000000, 0x6d656d5f, 0x65766f6d, 0x00000000, + 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6d656d5f, 0x65766f6d, + 0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x6e616e5f, 0x6d69746f, + 0x00000065, 0x06004018, 0x00004018, 0x04006000, 0x00000000, 0x00000000, + 0x6e616e5f, 0x6d69746f, 0x69685f65, 0x00006867, 0x00004018, 0x00004018, + 0x04008000, 0x00000000, 0x00000000, 0x6e616e5f, 0x6d69746f, 0x6f6c5f65, + 0x00000077, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6e616e5f, + 0x6d69746f, 0x00000065, 0x06004018, 0x06004018, 0x01000000, 0x6a626f5f, + 0x736d5f63, 0x6e655367, 0x00000064, 0x06004018, 0x00004018, 0x00000000, + 0x00000400, 0x00000000, 0x6a626f5f, 0x736d5f63, 0x6e655367, 0x00000064, + 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 0x736d5f63, 0x6e655367, + 0x00000064, 0x06004018, 0x06004018, 0x01000000, 0x6a626f5f, 0x73615f63, + 0x6e676973, 0x6176695f, 0x00000072, 0x06004018, 0x00004018, 0x00000000, + 0x00000400, 0x00000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6176695f, + 0x65675f72, 0x6972656e, 0x00000063, 0x00004018, 0x06004018, 0x00000000, + 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6176695f, 0x00000072, 0x06004018, + 0x06004018, 0x01000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6f6c675f, + 0x006c6162, 0x06004018, 0x00004018, 0x00000000, 0x00000400, 0x00000000, + 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6f6c675f, 0x5f6c6162, 0x656e6567, + 0x00636972, 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 0x73615f63, + 0x6e676973, 0x6f6c675f, 0x006c6162, 0x06004018, 0x06004018, 0x01000000, + 0x6a626f5f, 0x73615f63, 0x6e676973, 0x7274735f, 0x43676e6f, 0x00747361, + 0x06004018, 0x00004018, 0x00000000, 0x00000400, 0x00000000, 0x6a626f5f, + 0x73615f63, 0x6e676973, 0x7274735f, 0x43676e6f, 0x5f747361, 0x656e6567, + 0x00636972, 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 0x73615f63, + 0x6e676973, 0x7274735f, 0x43676e6f, 0x00747361, 0x06004018, +}; +commpage_descriptor sigdata_descriptor = +{ sigdata, sizeof(sigdata), 0xffff3000, 0, 0 }; + +static unsigned int badata[] = +{ + 0xaa3aff4b, 0x3a3aff4b, 0xca39ff4b, 0x6e39ff4b, 0x0230ff4b, 0x5a32ff4b, + 0xae32ff4b, 0x0a33ff4b, 0x4633ff4b, 0x9633ff4b, 0x3634ff4b, 0xaa34ff4b, + 0x1235ff4b, 0x7e35ff4b, 0xd635ff4b, 0x4a36ff4b, 0xce36ff4b, 0x6637ff4b, + 0xae37ff4b, 0x0e38ff4b, 0x7a38ff4b, 0x6630ff4b, 0xde30ff4b, 0x5e31ff4b, +}; +static commpage_descriptor badata_descriptor_ary[] = +{ + { &badata[ 0], 4, 0xfffefea0, 0, 0 }, + { &badata[ 1], 4, 0xfffefeb0, 0, 0 }, + { &badata[ 2], 4, 0xfffefec0, 0, 0 }, + { &badata[ 3], 4, 0xfffeff00, 0, 0 }, + { &badata[ 4], 4, 0xffff8080, 0, 0 }, + { &badata[ 5], 4, 0xffff8100, 0, 0 }, + { &badata[ 6], 4, 0xffff8140, 0, 0 }, + { &badata[ 7], 4, 0xffff8180, 0, 0 }, + { &badata[ 8], 4, 0xffff81a0, 0, 0 }, + { &badata[ 9], 4, 0xffff8200, 0, 0 }, + { &badata[10], 4, 0xffff8220, 0, 0 }, + { &badata[11], 4, 0xffff8260, 0, 0 }, + { &badata[12], 4, 0xffff82a0, 0, 0 }, + { &badata[13], 4, 0xffff82c0, 0, 0 }, + { &badata[14], 4, 0xffff82e0, 0, 0 }, + { &badata[15], 4, 0xffff84e0, 0, 0 }, + { &badata[16], 4, 0xffff8520, 0, 0 }, + { &badata[17], 4, 0xffff8580, 0, 0 }, + { &badata[18], 4, 0xffff8600, 0, 0 }, + { &badata[19], 4, 0xffff8780, 0, 0 }, + { &badata[20], 4, 0xffff87a0, 0, 0 }, + { &badata[21], 4, 0xffff8f80, 0, 0 }, + { &badata[22], 4, 0xffff8fc0, 0, 0 }, + { &badata[23], 4, 0xffff9000, 0, 0 }, +}; +commpage_descriptor *ba_descriptors[] = +{ + &badata_descriptor_ary[ 0], + &badata_descriptor_ary[ 1], + &badata_descriptor_ary[ 2], + &badata_descriptor_ary[ 3], + &badata_descriptor_ary[ 4], + &badata_descriptor_ary[ 5], + &badata_descriptor_ary[ 6], + &badata_descriptor_ary[ 7], + &badata_descriptor_ary[ 8], + &badata_descriptor_ary[ 9], + &badata_descriptor_ary[10], + &badata_descriptor_ary[11], + &badata_descriptor_ary[12], + &badata_descriptor_ary[13], + &badata_descriptor_ary[14], + &badata_descriptor_ary[15], + &badata_descriptor_ary[16], + &badata_descriptor_ary[17], + &badata_descriptor_ary[18], + &badata_descriptor_ary[19], + &badata_descriptor_ary[20], + &badata_descriptor_ary[21], + &badata_descriptor_ary[22], + &badata_descriptor_ary[23], + 0 +}; diff --git a/osfmk/i386/commpage/commpage_sigs.h b/osfmk/i386/commpage/commpage_sigs.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/osfmk/i386/commpage/commpage_sigs.s b/osfmk/i386/commpage/commpage_sigs.s deleted file mode 100644 index e69de29bb..000000000 diff --git a/osfmk/i386/commpage/longcopy_sse4.s b/osfmk/i386/commpage/longcopy_sse4.s new file mode 100644 index 000000000..86a2d2e39 --- /dev/null +++ b/osfmk/i386/commpage/longcopy_sse4.s @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + + +/* + * The bcopy/memcpy loops for very long operands, tuned for Pentium-M + * class processors with SSE4 and 64-byte cache lines. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kBigChunk (256*1024) // outer loop chunk size for kVeryLong sized operands + + +// Very long forward moves. These are at least several pages, so we loop over big +// chunks of memory (kBigChunk in size.) We first prefetch the chunk, and then copy +// it using non-temporal stores. Hopefully all the reads occur in the prefetch loop, +// so the copy loop reads from L2 and writes directly to memory (with write combining.) +// This minimizes bus turnaround and maintains good DRAM page locality. +// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache +// size. Otherwise, it is counter-productive to bypass L2 on the stores. +// +// We are called from the commpage bcopy loops when they encounter very long +// operands, with the standard ABI. +// +// void longcopy(const void *dest, void *sou, size_t len) + + .text + .align 5, 0x90 +Llongcopy_sse4: // void longcopy(const void *dest, void *sou, size_t len) + pushl %ebp // set up a frame for backtraces + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx // we'll need to use this too + movl 8(%ebp),%edi // get dest ptr + movl 12(%ebp),%esi // get source ptr + movl 16(%ebp),%ecx // get length + movl %edi,%ebx // copy dest ptr + negl %ebx + andl $63,%ebx // get #bytes to cache line align destination + jz LBigChunkLoop // already aligned + +// Cache line align destination, so temporal stores in copy loops work right. + + pushl %ebx // arg3 - #bytes to align destination (1..63) + pushl %esi // arg2 - source + pushl %edi // arg1 - dest + movl $(_COMM_PAGE_MEMCPY),%eax + call *%eax // align the destination + addl $12,%esp + movl 8(%ebp),%edi // recover dest ptr + movl 12(%ebp),%esi // recover source ptr + movl 16(%ebp),%ecx // recover length + addl %ebx,%esi // adjust ptrs and lengths past copy + addl %ebx,%edi + subl %ebx,%ecx + +// Loop over big chunks. +// ecx = length remaining (>= 4096) +// edi = dest (64-byte aligned) +// esi = source (may be unaligned) + +LBigChunkLoop: + movl $(kBigChunk),%edx // assume we can do a full chunk + cmpl %edx,%ecx // do we have a full chunk left to do? + cmovbl %ecx,%edx // if not, only move what we have left + andl $-4096,%edx // we work in page multiples + xor %eax,%eax // initialize chunk offset + jmp LTouchLoop + +// Touch in the next chunk. We try to keep the prefetch unit in "kick-start" mode, +// by touching two adjacent cache lines every 8 lines of each page, in four slices. +// Because the source may be unaligned, we use byte loads to touch. +// ecx = length remaining (including this chunk) +// edi = ptr to start of dest chunk +// esi = ptr to start of source chunk +// edx = chunk length (multiples of pages) +// ebx = scratch reg used to read a byte of each cache line +// eax = chunk offset + + .align 4,0x90 // 16-byte align inner loops +LTouchLoop: + movzb (%esi,%eax),%ebx // touch line 0, 2, 4, or 6 of page + movzb 1*64(%esi,%eax),%ebx // touch line 1, 3, 5, or 7 + movzb 8*64(%esi,%eax),%ebx // touch line 8, 10, 12, or 14 + movzb 9*64(%esi,%eax),%ebx // etc + + movzb 16*64(%esi,%eax),%ebx + movzb 17*64(%esi,%eax),%ebx + movzb 24*64(%esi,%eax),%ebx + movzb 25*64(%esi,%eax),%ebx + + movzb 32*64(%esi,%eax),%ebx + movzb 33*64(%esi,%eax),%ebx + movzb 40*64(%esi,%eax),%ebx + movzb 41*64(%esi,%eax),%ebx + + movzb 48*64(%esi,%eax),%ebx + movzb 49*64(%esi,%eax),%ebx + movzb 56*64(%esi,%eax),%ebx + movzb 57*64(%esi,%eax),%ebx + + subl $-128,%eax // next slice of page (adding 128 w 8-bit immediate) + testl $512,%eax // done with this page? + jz LTouchLoop // no, next of four slices + addl $(4096-512),%eax // move on to next page + cmpl %eax,%edx // done with this chunk? + jnz LTouchLoop // no, do next page + +// The chunk has been pre-fetched, now copy it using non-temporal stores. +// There are two copy loops, depending on whether the source is 16-byte aligned +// or not. + + addl %edx,%esi // increment ptrs by chunk length + addl %edx,%edi + subl %edx,%ecx // adjust remaining length + negl %edx // prepare loop index (counts up to 0) + testl $15,%esi // is source 16-byte aligned? + jnz LVeryLongUnaligned // source is not aligned + jmp LVeryLongAligned + + .align 4,0x90 // 16-byte align inner loops +LVeryLongAligned: // aligned loop over 128-bytes + movdqa (%esi,%edx),%xmm0 + movdqa 16(%esi,%edx),%xmm1 + movdqa 32(%esi,%edx),%xmm2 + movdqa 48(%esi,%edx),%xmm3 + movdqa 64(%esi,%edx),%xmm4 + movdqa 80(%esi,%edx),%xmm5 + movdqa 96(%esi,%edx),%xmm6 + movdqa 112(%esi,%edx),%xmm7 + + movntdq %xmm0,(%edi,%edx) + movntdq %xmm1,16(%edi,%edx) + movntdq %xmm2,32(%edi,%edx) + movntdq %xmm3,48(%edi,%edx) + movntdq %xmm4,64(%edi,%edx) + movntdq %xmm5,80(%edi,%edx) + movntdq %xmm6,96(%edi,%edx) + movntdq %xmm7,112(%edi,%edx) + + subl $-128,%edx // add 128 with an 8-bit immediate + jnz LVeryLongAligned + jmp LVeryLongChunkEnd + + .align 4,0x90 // 16-byte align inner loops +LVeryLongUnaligned: // unaligned loop over 128-bytes + movdqu (%esi,%edx),%xmm0 + movdqu 16(%esi,%edx),%xmm1 + movdqu 32(%esi,%edx),%xmm2 + movdqu 48(%esi,%edx),%xmm3 + movdqu 64(%esi,%edx),%xmm4 + movdqu 80(%esi,%edx),%xmm5 + movdqu 96(%esi,%edx),%xmm6 + movdqu 112(%esi,%edx),%xmm7 + + movntdq %xmm0,(%edi,%edx) + movntdq %xmm1,16(%edi,%edx) + movntdq %xmm2,32(%edi,%edx) + movntdq %xmm3,48(%edi,%edx) + movntdq %xmm4,64(%edi,%edx) + movntdq %xmm5,80(%edi,%edx) + movntdq %xmm6,96(%edi,%edx) + movntdq %xmm7,112(%edi,%edx) + + subl $-128,%edx // add 128 with an 8-bit immediate + jnz LVeryLongUnaligned + +LVeryLongChunkEnd: + cmpl $4096,%ecx // at least another page to go? + jae LBigChunkLoop // yes + +// Done. Call memcpy() again to handle the 0-4095 bytes at the end. + + sfence // required by non-temporal stores + testl %ecx,%ecx // anything left to copy? + jz 1f + pushl %ecx // arg3 - #bytes to align destination (1..63) + pushl %esi // arg2 - source + pushl %edi // arg1 - dest + movl $(_COMM_PAGE_MEMCPY),%eax + call *%eax // align the destination + addl $12,%esp // pop off arguments +1: + popl %ebx + popl %edi + popl %esi + popl %ebp + ret + + /* always match for now, as commpage_stuff_routine() will panic if no match */ + COMMPAGE_DESCRIPTOR(longcopy_sse4, _COMM_PAGE_LONGCOPY, 0 ,0) diff --git a/osfmk/i386/commpage/longcopy_sse4_64.s b/osfmk/i386/commpage/longcopy_sse4_64.s new file mode 100644 index 000000000..a55c6551e --- /dev/null +++ b/osfmk/i386/commpage/longcopy_sse4_64.s @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + + +/* + * The bcopy/memcpy loops for very long operands, tuned for 64-bit + * Pentium-M class processors with SSE4 and 64-byte cache lines. + * This is the 64-bit version. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kBigChunk (256*1024) // outer loop chunk size for kVeryLong sized operands + + +// Very long forward moves. These are at least several pages, so we loop over big +// chunks of memory (kBigChunk in size.) We first prefetch the chunk, and then copy +// it using non-temporal stores. Hopefully all the reads occur in the prefetch loop, +// so the copy loop reads from L2 and writes directly to memory (with write combining.) +// This minimizes bus turnaround and maintains good DRAM page locality. +// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache +// size. Otherwise, it is counter-productive to bypass L2 on the stores. +// +// We are called from the commpage bcopy loops when they encounter very long +// operands, with the standard ABI: +// rdi = dest ptr +// rsi = source ptr +// rdx = length (>= 8kb, probably much bigger) + + .text + .code64 + .align 5, 0x90 +Llongcopy_sse4_64: // void longcopy(const void *dest, void *sou, size_t len) + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movl %edi,%eax // copy dest ptr + negl %eax + andl $63,%eax // get #bytes to cache line align destination + jz LBigChunkLoop // already aligned + +// Cache line align destination, so temporal stores in copy loops work right. +// The recursive call returns with the source and dest ptrs properly updated. + + subq %rax,%rdx // get length remaining after dest is aligned + pushq %rdx // save length remaining + movl %eax,%edx // #bytes to copy to align destination + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_MEMCPY),%rax + call *%rax + popq %rdx // recover adjusted length + +// Loop over big chunks. +// rdx = length remaining (>= 4096) +// rdi = dest (64-byte aligned) +// rsi = source (may be unaligned) + +LBigChunkLoop: + movl $(kBigChunk),%r8d // assume we can do a full chunk + cmpq %r8,%rdx // do we have a full chunk left to do? + cmovbl %edx,%r8d // if not, only move what we have left + andl $-4096,%r8d // we work in page multiples + xorl %eax,%eax // initialize chunk offset + jmp LTouchLoop + +// Touch in the next chunk. We try to keep the prefetch unit in "kick-start" mode, +// by touching two adjacent cache lines every 8 lines of each page, in four slices. +// Because the source may be unaligned, we use byte loads to touch. +// rdx = length remaining (including this chunk) +// rdi = ptr to start of dest chunk +// rsi = ptr to start of source chunk +// r8d = chunk length (multiples of pages, less than 2**32) +// ecx = scratch reg used to read a byte of each cache line +// eax = chunk offset + + .align 4,0x90 // 16-byte align inner loops +LTouchLoop: + movzb (%rsi,%rax),%ecx // touch line 0, 2, 4, or 6 of page + movzb 1*64(%rsi,%rax),%ecx // touch line 1, 3, 5, or 7 + movzb 8*64(%rsi,%rax),%ecx // touch line 8, 10, 12, or 14 + movzb 9*64(%rsi,%rax),%ecx // etc + + movzb 16*64(%rsi,%rax),%ecx + movzb 17*64(%rsi,%rax),%ecx + movzb 24*64(%rsi,%rax),%ecx + movzb 25*64(%rsi,%rax),%ecx + + movzb 32*64(%rsi,%rax),%ecx + movzb 33*64(%rsi,%rax),%ecx + movzb 40*64(%rsi,%rax),%ecx + movzb 41*64(%rsi,%rax),%ecx + + movzb 48*64(%rsi,%rax),%ecx + movzb 49*64(%rsi,%rax),%ecx + movzb 56*64(%rsi,%rax),%ecx + movzb 57*64(%rsi,%rax),%ecx + + subl $-128,%eax // next slice of page (adding 128 w 8-bit immediate) + testl $512,%eax // done with this page? + jz LTouchLoop // no, next of four slices + addl $(4096-512),%eax // move on to next page + cmpl %eax,%r8d // done with this chunk? + jnz LTouchLoop // no, do next page + +// The chunk has been pre-fetched, now copy it using non-temporal stores. +// There are two copy loops, depending on whether the source is 16-byte aligned +// or not. + + movl %r8d,%ecx // copy chunk size to a reg that doesn't use REX prefix + addq %rcx,%rsi // increment ptrs by chunk length + addq %rcx,%rdi + subq %rcx,%rdx // adjust remaining length + negq %rcx // prepare loop index (counts up to 0) + testl $15,%esi // is source 16-byte aligned? + jnz LVeryLongUnaligned // no + jmp LVeryLongAligned + + .align 4,0x90 // 16-byte align inner loops +LVeryLongAligned: // aligned loop over 128-bytes + movdqa (%rsi,%rcx),%xmm0 + movdqa 16(%rsi,%rcx),%xmm1 + movdqa 32(%rsi,%rcx),%xmm2 + movdqa 48(%rsi,%rcx),%xmm3 + movdqa 64(%rsi,%rcx),%xmm4 + movdqa 80(%rsi,%rcx),%xmm5 + movdqa 96(%rsi,%rcx),%xmm6 + movdqa 112(%rsi,%rcx),%xmm7 + + movntdq %xmm0,(%rdi,%rcx) + movntdq %xmm1,16(%rdi,%rcx) + movntdq %xmm2,32(%rdi,%rcx) + movntdq %xmm3,48(%rdi,%rcx) + movntdq %xmm4,64(%rdi,%rcx) + movntdq %xmm5,80(%rdi,%rcx) + movntdq %xmm6,96(%rdi,%rcx) + movntdq %xmm7,112(%rdi,%rcx) + + subq $-128,%rcx // add 128 with an 8-bit immediate + jnz LVeryLongAligned + jmp LVeryLongChunkEnd + + .align 4,0x90 // 16-byte align inner loops +LVeryLongUnaligned: // unaligned loop over 128-bytes + movdqu (%rsi,%rcx),%xmm0 + movdqu 16(%rsi,%rcx),%xmm1 + movdqu 32(%rsi,%rcx),%xmm2 + movdqu 48(%rsi,%rcx),%xmm3 + movdqu 64(%rsi,%rcx),%xmm4 + movdqu 80(%rsi,%rcx),%xmm5 + movdqu 96(%rsi,%rcx),%xmm6 + movdqu 112(%rsi,%rcx),%xmm7 + + movntdq %xmm0,(%rdi,%rcx) + movntdq %xmm1,16(%rdi,%rcx) + movntdq %xmm2,32(%rdi,%rcx) + movntdq %xmm3,48(%rdi,%rcx) + movntdq %xmm4,64(%rdi,%rcx) + movntdq %xmm5,80(%rdi,%rcx) + movntdq %xmm6,96(%rdi,%rcx) + movntdq %xmm7,112(%rdi,%rcx) + + subq $-128,%rcx // add 128 with an 8-bit immediate + jnz LVeryLongUnaligned + +LVeryLongChunkEnd: + cmpq $4096,%rdx // at least another page to go? + jae LBigChunkLoop // yes + +// Done. Call memcpy() again to handle the 0-4095 bytes at the end. +// We still have the args in the right registers: +// rdi = destination ptr +// rsi = source ptr +// rdx = length remaining (0..4095) + + sfence // required by non-temporal stores + testl %edx,%edx // anything left to copy? + jz 1f + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_MEMCPY),%rax + call *%rax +1: + popq %rbp // restore frame ptr + ret + + /* always match for now, as commpage_stuff_routine() will panic if no match */ + COMMPAGE_DESCRIPTOR(longcopy_sse4_64, _COMM_PAGE_LONGCOPY, 0 ,0) diff --git a/osfmk/i386/commpage/memset_pattern_sse3.s b/osfmk/i386/commpage/memset_pattern_sse3.s new file mode 100644 index 000000000..58fbe9fc6 --- /dev/null +++ b/osfmk/i386/commpage/memset_pattern_sse3.s @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* The common path for nonzero memset and the memset_pattern routines, + * tuned for Pentium-M class processors with SSE3 and 64-byte cache lines. + * This is used by the following functions: + * + * void *memset(void *b, int c, size_t len); // when c!=0 + * void memset_pattern4(void *b, const void *c4, size_t len); + * void memset_pattern8(void *b, const void *c8, size_t len); + * void memset_pattern16(void *b, const void *c16, size_t len); + * + * Note bzero() and memset() of 0 are handled separately. + */ + +#define kShort 63 +#define kVeryLong (1024*1024) + +// Initial entry from Libc with parameters passed in registers. Although we +// correctly handle misaligned ptrs and short operands, they are inefficient. +// Therefore our caller should filter out short operands and exploit local +// knowledge (ie, original pattern length) to align the ptr if possible. +// When called, we expect: +// %edi = ptr to memory to set (not necessarily aligned) +// %edx = length (may be short or even 0) +// %xmm0 = the pattern to store +// Return conditions: +// %eax, %edi, %esi, %ecx, and %edx all trashed + + .text + .align 5, 0x90 +Lmemset_pattern_sse3: + cmpl $(kShort),%edx // long enough to bother aligning? + ja LNotShort // yes + jmp LShort // no + +// Here for short operands or the end of long ones. +// %edx = length +// %edi = ptr (may not be not aligned) +// %xmm0 = pattern + +LUnalignedStore16: + movdqu %xmm0,(%edi) // stuff in another 16 bytes + subl $16,%edx + addl $16,%edi +LShort: + cmpl $16,%edx // room for another vector? + jge LUnalignedStore16 // yes +LLessThan16: // here at end of copy with < 16 bytes remaining + test $8,%dl // 8-byte store required? + jz 2f // no + movq %xmm0,(%edi) // pack in 8 low bytes + psrldq $8,%xmm0 // then shift vector down 8 bytes + addl $8,%edi +2: + test $4,%dl // 4-byte store required? + jz 3f // no + movd %xmm0,(%edi) // pack in 4 low bytes + psrldq $4,%xmm0 // then shift vector down 4 bytes + addl $4,%edi +3: + andl $3,%edx // more to go? + jz 5f // no + movd %xmm0,%eax // move remainders out into %eax +4: // loop on up to three bytes + movb %al,(%edi) // pack in next byte + shrl $8,%eax // shift next byte into position + inc %edi + dec %edx + jnz 4b +5: ret + +// Long enough to justify aligning ptr. Note that we have to rotate the +// pattern to account for any alignment. We do this by doing two unaligned +// stores, and then an aligned load from the middle of the two stores. +// This will stall on store forwarding alignment mismatch, and the unaligned +// stores can be pretty slow too, but the alternatives aren't any better. +// Fortunately, in most cases our caller has already aligned the ptr. +// %edx = length (> kShort) +// %edi = ptr (may not be aligned) +// %xmm0 = pattern + +LNotShort: + movl %edi,%ecx // copy dest ptr + negl %ecx + andl $15,%ecx // mask down to #bytes to 16-byte align + jz LAligned // skip if already aligned + movdqu %xmm0,(%edi) // store 16 unaligned bytes + movdqu %xmm0,16(%edi) // and 16 more, to be sure we have an aligned chunk + addl %ecx,%edi // now point to the aligned chunk + subl %ecx,%edx // adjust remaining count + movdqa (%edi),%xmm0 // get the rotated pattern (probably stalling) + addl $16,%edi // skip past the aligned chunk + subl $16,%edx + +// Set up for 64-byte loops. +// %edx = length remaining +// %edi = ptr (aligned) +// %xmm0 = rotated pattern + +LAligned: + movl %edx,%ecx // copy length remaining + andl $63,%edx // mask down to residual length (0..63) + andl $-64,%ecx // %ecx <- #bytes we will zero in by-64 loop + jz LNoMoreChunks // no 64-byte chunks + addl %ecx,%edi // increment ptr by length to move + cmpl $(kVeryLong),%ecx // long enough to justify non-temporal stores? + jge LVeryLong // yes + negl %ecx // negate length to move + jmp 1f + +// Loop over 64-byte chunks, storing into cache. + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movdqa %xmm0,(%edi,%ecx) + movdqa %xmm0,16(%edi,%ecx) + movdqa %xmm0,32(%edi,%ecx) + movdqa %xmm0,48(%edi,%ecx) + addl $64,%ecx + jne 1b + + jmp LNoMoreChunks + +// Very long operands: use non-temporal stores to bypass cache. + +LVeryLong: + negl %ecx // negate length to move + jmp 1f + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movntdq %xmm0,(%edi,%ecx) + movntdq %xmm0,16(%edi,%ecx) + movntdq %xmm0,32(%edi,%ecx) + movntdq %xmm0,48(%edi,%ecx) + addl $64,%ecx + jne 1b + + sfence // required by non-temporal stores + jmp LNoMoreChunks + +// Handle leftovers: loop by 16. +// %edx = length remaining (<64) +// %edi = ptr (aligned) +// %xmm0 = rotated pattern + +LLoopBy16: + movdqa %xmm0,(%edi) // pack in 16 more bytes + subl $16,%edx // decrement count + addl $16,%edi // increment ptr +LNoMoreChunks: + cmpl $16,%edx // more to go? + jge LLoopBy16 // yes + jmp LLessThan16 // handle up to 15 remaining bytes + + COMMPAGE_DESCRIPTOR(memset_pattern_sse3,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0) diff --git a/osfmk/i386/commpage/memset_pattern_sse3_64.s b/osfmk/i386/commpage/memset_pattern_sse3_64.s new file mode 100644 index 000000000..ab2f42fc9 --- /dev/null +++ b/osfmk/i386/commpage/memset_pattern_sse3_64.s @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +/* The common path for nonzero memset and the memset_pattern routines, + * tuned for Pentium-M class processors with SSE3 and 64-byte cache lines. + * This is the 64-bit bersion. It is used by the following functions: + * + * void *memset(void *b, int c, size_t len); // when c!=0 + * void memset_pattern4(void *b, const void *c4, size_t len); + * void memset_pattern8(void *b, const void *c8, size_t len); + * void memset_pattern16(void *b, const void *c16, size_t len); + * + * Note bzero() and memset() of 0 are handled separately. + */ + +#define kShort 63 +#define kVeryLong (1024*1024) + +// Initial entry from Libc with parameters passed in registers. Although we +// correctly handle misaligned ptrs and short operands, they are inefficient. +// Therefore our caller should filter out short operands and exploit local +// knowledge (ie, original pattern length) to align the ptr if possible. +// When called, we expect: +// %rdi = ptr to memory to set (not necessarily aligned) +// %rdx = length (may be short or even 0) +// %xmm0 = the pattern to store +// Return conditions: +// %rax, %rdi, %rsi, %rcx, and %rdx all trashed +// we preserve %r8, %r9, %r10, and %r11 + + .text + .align 5, 0x90 + .code64 +Lmemset_pattern_sse3_64: + cmpq $(kShort),%rdx // long enough to bother aligning? + ja LNotShort // yes + jmp LShort // no + +// Here for short operands or the end of long ones. +// %rdx = length (<= kShort) +// %rdi = ptr (may not be not aligned) +// %xmm0 = pattern + +LUnalignedStore16: + movdqu %xmm0,(%rdi) // stuff in another 16 bytes + subl $16,%edx + addq $16,%rdi +LShort: + cmpl $16,%edx // room for another vector? + jge LUnalignedStore16 // yes +LLessThan16: // here at end of copy with < 16 bytes remaining + test $8,%dl // 8-byte store required? + jz 2f // no + movq %xmm0,(%rdi) // pack in 8 low bytes + psrldq $8,%xmm0 // then shift vector down 8 bytes + addq $8,%rdi +2: + test $4,%dl // 4-byte store required? + jz 3f // no + movd %xmm0,(%rdi) // pack in 4 low bytes + psrldq $4,%xmm0 // then shift vector down 4 bytes + addq $4,%rdi +3: + andl $3,%edx // more to go? + jz 5f // no + movd %xmm0,%eax // move remainders out into %eax +4: // loop on up to three bytes + movb %al,(%rdi) // pack in next byte + shrl $8,%eax // shift next byte into position + incq %rdi + dec %edx + jnz 4b +5: ret + +// Long enough to justify aligning ptr. Note that we have to rotate the +// pattern to account for any alignment. We do this by doing two unaligned +// stores, and then an aligned load from the middle of the two stores. +// This will stall on store forwarding alignment mismatch, and the unaligned +// stores can be pretty slow too, but the alternatives aren't any better. +// Fortunately, in most cases our caller has already aligned the ptr. +// %rdx = length (> kShort) +// %rdi = ptr (may not be aligned) +// %xmm0 = pattern + +LNotShort: + movl %edi,%ecx // copy low bits of dest ptr + negl %ecx + andl $15,%ecx // mask down to #bytes to 16-byte align + jz LAligned // skip if already aligned + movdqu %xmm0,(%rdi) // store 16 unaligned bytes + movdqu %xmm0,16(%rdi) // and 16 more, to be sure we have an aligned chunk + addq %rcx,%rdi // now point to the aligned chunk + subq %rcx,%rdx // adjust remaining count + movdqa (%rdi),%xmm0 // get the rotated pattern (probably stalling) + addq $16,%rdi // skip past the aligned chunk + subq $16,%rdx + +// Set up for 64-byte loops. +// %rdx = length remaining +// %rdi = ptr (aligned) +// %xmm0 = rotated pattern + +LAligned: + movq %rdx,%rcx // copy length remaining + andl $63,%edx // mask down to residual length (0..63) + andq $-64,%rcx // %ecx <- #bytes we will zero in by-64 loop + jz LNoMoreChunks // no 64-byte chunks + addq %rcx,%rdi // increment ptr by length to move + cmpq $(kVeryLong),%rcx // long enough to justify non-temporal stores? + jge LVeryLong // yes + negq %rcx // negate length to move + jmp 1f + +// Loop over 64-byte chunks, storing into cache. + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movdqa %xmm0,(%rdi,%rcx) + movdqa %xmm0,16(%rdi,%rcx) + movdqa %xmm0,32(%rdi,%rcx) + movdqa %xmm0,48(%rdi,%rcx) + addq $64,%rcx + jne 1b + + jmp LNoMoreChunks + +// Very long operands: use non-temporal stores to bypass cache. + +LVeryLong: + negq %rcx // negate length to move + jmp 1f + + .align 4,0x90 // keep inner loops 16-byte aligned +1: + movntdq %xmm0,(%rdi,%rcx) + movntdq %xmm0,16(%rdi,%rcx) + movntdq %xmm0,32(%rdi,%rcx) + movntdq %xmm0,48(%rdi,%rcx) + addq $64,%rcx + jne 1b + + sfence // required by non-temporal stores + jmp LNoMoreChunks + +// Handle leftovers: loop by 16. +// %edx = length remaining (<64) +// %edi = ptr (aligned) +// %xmm0 = rotated pattern + +LLoopBy16: + movdqa %xmm0,(%rdi) // pack in 16 more bytes + subl $16,%edx // decrement count + addq $16,%rdi // increment ptr +LNoMoreChunks: + cmpl $16,%edx // more to go? + jge LLoopBy16 // yes + jmp LLessThan16 // handle up to 15 remaining bytes + + COMMPAGE_DESCRIPTOR(memset_pattern_sse3_64,_COMM_PAGE_MEMSET_PATTERN,kHasSSE3,0) diff --git a/osfmk/i386/commpage/pthreads.s b/osfmk/i386/commpage/pthreads.s index ccf4d9eca..4aa2675b4 100644 --- a/osfmk/i386/commpage/pthreads.s +++ b/osfmk/i386/commpage/pthreads.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,21 +24,41 @@ #include #include -#define _PTHREAD_TSD_OFFSET 0x48 +#define _PTHREAD_TSD_OFFSET32 0x48 +#define _PTHREAD_TSD_OFFSET64 0x60 + +/* These routines do not need to be on the copmmpage on Intel. They are for now + * to avoid revlock, but the code should move to Libc, and we should eventually remove + * these. + */ .text .align 2, 0x90 Lpthread_getspecific: movl 4(%esp), %eax - movl %gs:_PTHREAD_TSD_OFFSET(,%eax,4), %eax + movl %gs:_PTHREAD_TSD_OFFSET32(,%eax,4), %eax ret COMMPAGE_DESCRIPTOR(pthread_getspecific,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) Lpthread_self: - movl 4(%esp), %eax - movl %gs:_PTHREAD_TSD_OFFSET, %eax + movl %gs:_PTHREAD_TSD_OFFSET32, %eax ret COMMPAGE_DESCRIPTOR(pthread_self,_COMM_PAGE_PTHREAD_SELF,0,0) + +/* the 64-bit versions: */ + + .code64 +Lpthread_getspecific_64: + movq %gs:_PTHREAD_TSD_OFFSET64(,%rdi,8), %rax + ret + + COMMPAGE_DESCRIPTOR(pthread_getspecific_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) + +Lpthread_self_64: + movq %gs:_PTHREAD_TSD_OFFSET64, %rax + ret + + COMMPAGE_DESCRIPTOR(pthread_self_64,_COMM_PAGE_PTHREAD_SELF,0,0) diff --git a/osfmk/i386/commpage/spinlocks.s b/osfmk/i386/commpage/spinlocks.s index e2cb71207..f8d6329fa 100644 --- a/osfmk/i386/commpage/spinlocks.s +++ b/osfmk/i386/commpage/spinlocks.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,23 +23,9 @@ #include #include #include +#include + -/* - * We need a relative branch within the comm page, and don't want the linker - * to relocate it, so we have to hand-code the instructions. LEN is to account - * for the length of a .long, since the jmp is relative to the next instruction. - */ - -#define JNZ .byte 0x0f, 0x85; .long -#define JMP .byte 0xe9; .long -#define LEN 4 - -/* - * Branch prediction prefixes - */ - -#define LIKELY .byte 0x3e -#define UNLIKELY .byte 0x2e #define MP_SPIN_TRIES 1024 @@ -57,6 +43,7 @@ Lspin_lock_try_up: COMMPAGE_DESCRIPTOR(spin_lock_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,0) + .align 4, 0x90 Lspin_lock_try_mp: movl 4(%esp), %ecx @@ -70,21 +57,29 @@ Lspin_lock_try_mp: COMMPAGE_DESCRIPTOR(spin_lock_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,kUP) -.set Lrelinquish_off, _COMM_PAGE_RELINQUISH - _COMM_PAGE_SPINLOCK_LOCK .align 4, 0x90 Lspin_lock_up: movl 4(%esp), %ecx xorl %eax, %eax -.set Lretry, . - Lspin_lock_up orl $-1, %edx cmpxchgl %edx, (%ecx) - UNLIKELY - JNZ Lrelinquish_off - . + Lspin_lock_up - LEN + jnz,pn 1f /* predict not taken */ ret +1: + /* failed to get lock so relinquish the processor immediately on UP */ + pushl $1 /* 1 ms */ + pushl $1 /* SWITCH_OPTION_DEPRESS */ + pushl $0 /* THREAD_NULL */ + pushl $0 /* push dummy stack ret addr */ + movl $-61,%eax /* SYSCALL_THREAD_SWITCH */ + int $(MACH_INT) + addl $16, %esp /* adjust stack*/ + jmp Lspin_lock_up COMMPAGE_DESCRIPTOR(spin_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) + .align 4, 0x90 Lspin_lock_mp: movl 4(%esp), %ecx @@ -93,8 +88,7 @@ Lspin_lock_mp: orl $-1, %edx lock cmpxchgl %edx, (%ecx) - UNLIKELY - jnz 1f + jnz,pn 1f /* predict not taken */ ret 1: xorl %eax, %eax @@ -102,15 +96,22 @@ Lspin_lock_mp: 2: pause cmpl %eax, (%ecx) - LIKELY - jz 0b + jz,pt 0b /* favor success and slow down spin loop */ decl %edx - LIKELY - jnz 2b - JMP Lrelinquish_off - . + Lspin_lock_mp - LEN + jnz,pn 2b /* slow down spin loop with a mispredict */ + /* failed to get lock after spinning so relinquish */ + pushl $1 /* 1 ms */ + pushl $1 /* SWITCH_OPTION_DEPRESS */ + pushl $0 /* THREAD_NULL */ + pushl $0 /* push dummy stack ret addr */ + movl $-61,%eax /* SYSCALL_THREAD_SWITCH */ + int $(MACH_INT) + addl $16, %esp /* adjust stack*/ + jmp Lspin_lock_mp COMMPAGE_DESCRIPTOR(spin_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) + .align 4, 0x90 Lspin_unlock: movl 4(%esp), %ecx @@ -119,16 +120,93 @@ Lspin_unlock: COMMPAGE_DESCRIPTOR(spin_unlock,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) + +/* ============================ 64-bit versions follow ===================== */ + + + .text + .code64 .align 4, 0x90 -Lrelinquish: /* relinquish the processor */ - pushl $1 /* 1 ms */ - pushl $1 /* SWITCH_OPTION_DEPRESS */ - pushl $0 /* THREAD_NULL */ - pushl $0 /* push dummy stack ret addr */ - movl $-61, %eax /* syscall_thread_switch */ - lcall $7, $0 - addl $16, %esp /* adjust stack*/ - xorl %eax, %eax /* set %eax to 0 again */ - JMP Lretry - Lrelinquish_off - . + Lrelinquish - LEN - COMMPAGE_DESCRIPTOR(relinquish,_COMM_PAGE_RELINQUISH,0,0) +Lspin_lock_try_up_64: + xorl %eax, %eax + orl $-1, %edx + cmpxchgl %edx, (%rdi) + setz %dl + movzbl %dl, %eax + ret + + COMMPAGE_DESCRIPTOR(spin_lock_try_up_64,_COMM_PAGE_SPINLOCK_TRY,kUP,0) + + + .align 4, 0x90 +Lspin_lock_try_mp_64: + xorl %eax, %eax + orl $-1, %edx + lock + cmpxchgl %edx, (%rdi) + setz %dl + movzbl %dl, %eax + ret + + COMMPAGE_DESCRIPTOR(spin_lock_try_mp_64,_COMM_PAGE_SPINLOCK_TRY,0,kUP) + + + .align 4, 0x90 +Lspin_lock_up_64: + movq %rdi,%r8 +0: + xorl %eax, %eax + orl $-1, %edx + cmpxchgl %edx, (%r8) + jnz,pn 1f /* predict not taken */ + ret +1: + /* failed to get lock so relinquish the processor immediately on UP */ + xorl %edi,%edi /* THREAD_NULL */ + movl $1,%esi /* SWITCH_OPTION_DEPRESS */ + movl $1,%edx /* 1 ms */ + movl $(SYSCALL_CONSTRUCT_MACH(61)),%eax /* 61 = thread_switch */ + syscall + jmp 0b + + COMMPAGE_DESCRIPTOR(spin_lock_up_64,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) + + + + .align 4, 0x90 +Lspin_lock_mp_64: + movq %rdi,%r8 +0: + xorl %eax, %eax + orl $-1, %edx + lock + cmpxchgl %edx, (%r8) + jnz,pn 1f /* predict not taken */ + ret +1: + xorl %eax, %eax + movl $(MP_SPIN_TRIES), %edx +2: /* spin for awhile before relinquish */ + pause + cmpl %eax, (%r8) + jz 0b + decl %edx + jnz 2b + /* failed to get lock after spinning so relinquish */ + xorl %edi,%edi /* THREAD_NULL */ + movl $1,%esi /* SWITCH_OPTION_DEPRESS */ + movl $1,%edx /* 1 ms */ + movl $(SYSCALL_CONSTRUCT_MACH(61)),%eax /* 61 = thread_switch */ + syscall + jmp 0b + + COMMPAGE_DESCRIPTOR(spin_lock_mp_64,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) + + + .align 4, 0x90 +Lspin_unlock_64: + movl $0, (%rdi) + ret + + COMMPAGE_DESCRIPTOR(spin_unlock_64,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index f16989952..7d19205d8 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -35,8 +35,9 @@ #include #include #include +#include #include - +#include "cpuid.h" struct processor processor_master; @@ -80,6 +81,8 @@ cpu_sleep(void) { cpu_data_t *proc_info = current_cpu_datap(); + proc_info->cpu_running = FALSE; + PE_cpu_machine_quiesce(proc_info->cpu_id); cpu_thread_halt(); @@ -90,14 +93,13 @@ cpu_init(void) { cpu_data_t *cdp = current_cpu_datap(); -#ifdef MACH_BSD - /* FIXME */ - cdp->cpu_type = CPU_TYPE_I386; - cdp->cpu_subtype = CPU_SUBTYPE_PENTPRO; -#else - cdp->cpu_type = cpuid_cputype(0); - cdp->cpu_subtype = CPU_SUBTYPE_AT386; -#endif + /* be sure cpuid is initialized */ + cpuid_set_info(); + + /* and allow it to be authoritative */ + cdp->cpu_type = cpuid_cputype(); + cdp->cpu_subtype = cpuid_cpusubtype(); + cdp->cpu_running = TRUE; } @@ -130,10 +132,11 @@ void cpu_machine_init( void) { - int cpu; + cpu_data_t *cdp = current_cpu_datap(); - cpu = get_cpu_number(); - PE_cpu_machine_init(cpu_datap(cpu)->cpu_id, TRUE); + PE_cpu_machine_init(cdp->cpu_id, !cdp->cpu_boot_complete); + cdp->cpu_boot_complete = TRUE; + cdp->cpu_running = TRUE; #if 0 if (cpu_datap(cpu)->hibernate) { diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index 08d4f0bea..6270145f1 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -29,10 +29,7 @@ #endif /* - * This is the authoritative way to determine from user mode what - * implementation-specific processor features are available. * This API only supported for Apple internal use. - * */ /* Bit definitions for _cpu_capabilities: */ @@ -41,15 +38,18 @@ #define kHasSSE 0x00000002 #define kHasSSE2 0x00000004 #define kHasSSE3 0x00000008 -#define kCache32 0x00000010 // cache line size is 32 bytes +#define kCache32 0x00000010 /* cache line size is 32 bytes */ #define kCache64 0x00000020 #define kCache128 0x00000040 -#define kFastThreadLocalStorage 0x00000080 // TLS ptr is kept in a user-mode-readable register +#define kFastThreadLocalStorage 0x00000080 /* TLS ptr is kept in a user-mode-readable register */ +#define kHasSupplementalSSE3 0x00000100 +#define kHasMNI kHasSupplementalSSE3 +#define k64Bit 0x00000200 /* processor supports EM64T (not what mode you're running in) */ -#define kUP 0x00008000 // set if (kNumCPUs == 1) -#define kNumCPUs 0x00FF0000 // number of CPUs (see _NumCPUs() below) +#define kUP 0x00008000 /* set if (kNumCPUs == 1) */ +#define kNumCPUs 0x00FF0000 /* number of CPUs (see _NumCPUs() below) */ -#define kNumCPUsShift 16 // see _NumCPUs() below +#define kNumCPUsShift 16 /* see _NumCPUs() below */ #ifndef __ASSEMBLER__ #include @@ -70,92 +70,137 @@ int _NumCPUs( void ) /* * The shared kernel/user "comm page(s)": * - * The last eight pages of every address space are reserved for the kernel/user - * "comm area". During system initialization, the kernel populates the comm page with + * The last several pages of every address space are reserved for the kernel/user + * "comm area". During system initialization, the kernel populates the comm pages with * code customized for the particular processor and platform. * - * Because Mach VM cannot map the last page of an address space, the max length of - * the comm area is seven pages. + * Because Mach VM cannot map the last page of an address space, we don't use it. */ -#define _COMM_PAGE_AREA_LENGTH (19*4096) - // reserved length of entire comm area -#define _COMM_PAGE_BASE_ADDRESS (-20*4096) - // VM_MAX_ADDRESS-_COMM_PAGE_AREA_LENGTH -#define _COMM_PAGE_START_ADDRESS (-16*4096) - // VM_MAX_ADDRESS-_COMM_PAGE_AREA_LENGTH -#define _COMM_PAGE_SIGS_OFFSET 0x8000 - // offset to routine signatures +#define _COMM_PAGE32_AREA_LENGTH ( 19 * 4096 ) /* reserved length of entire comm area */ +#define _COMM_PAGE32_BASE_ADDRESS ( -20 * 4096 ) /* base address of allocated memory */ +#define _COMM_PAGE32_START_ADDRESS ( -16 * 4096 ) /* address traditional commpage code starts on */ +#define _COMM_PAGE32_AREA_USED ( 19 * 4096 ) /* this is the amt actually allocated */ + +#define _COMM_PAGE64_AREA_LENGTH ( 2 * 1024 * 1024 ) /* reserved length of entire comm area (2MB) */ +#define _COMM_PAGE64_BASE_ADDRESS ( 0x00007fffffe00000ULL ) /* base address of allocated memory */ +#define _COMM_PAGE64_START_ADDRESS ( _COMM_PAGE64_BASE_ADDRESS ) /* address traditional commpage code starts on */ +#define _COMM_PAGE64_AREA_USED ( 2 * 4096 ) /* this is the amt actually populated */ + + +#if defined(__i386__) + +#define _COMM_PAGE_AREA_LENGTH _COMM_PAGE32_AREA_LENGTH +#define _COMM_PAGE_BASE_ADDRESS _COMM_PAGE32_BASE_ADDRESS +#define _COMM_PAGE_START_ADDRESS _COMM_PAGE32_START_ADDRESS +#define _COMM_PAGE_AREA_USED _COMM_PAGE32_AREA_USED +#define _COMM_PAGE_SIGS_OFFSET 0x8000 /* offset to routine signatures */ + +#elif defined(__x86_64__) + +#define _COMM_PAGE_AREA_LENGTH _COMM_PAGE64_AREA_LENGTH +#define _COMM_PAGE_BASE_ADDRESS _COMM_PAGE64_BASE_ADDRESS +#define _COMM_PAGE_START_ADDRESS _COMM_PAGE64_START_ADDRESS +#define _COMM_PAGE_AREA_USED _COMM_PAGE64_AREA_USED + +#else +#error architecture not supported +#endif /* data in the comm page */ -#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) // first few bytes are a signature -#define _COMM_PAGE_VERSION (_COMM_PAGE_START_ADDRESS+0x01E) // 16-bit version# -#define _COMM_PAGE_THIS_VERSION 3 // version of the commarea format +#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) /* first few bytes are a signature */ +#define _COMM_PAGE_VERSION (_COMM_PAGE_START_ADDRESS+0x01E) /* 16-bit version# */ +#define _COMM_PAGE_THIS_VERSION 6 /* version of the commarea format */ -#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) // uint32_t _cpu_capabilities -#define _COMM_PAGE_NCPUS (_COMM_PAGE_START_ADDRESS+0x021) // uint8_t number of configured CPUs -#define _COMM_PAGE_VECTOR_FLAVOR (_COMM_PAGE_START_ADDRESS+0x024) // uint8_t SSE/SSE2/SSE3 -#define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_START_ADDRESS+0x026) // uint16_t cache line size - -#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_START_ADDRESS+0x030) // 16 unused bytes - -#define _COMM_PAGE_2_TO_52 (_COMM_PAGE_START_ADDRESS+0x040) // double float constant 2**52 -#define _COMM_PAGE_10_TO_6 (_COMM_PAGE_START_ADDRESS+0x048) // double float constant 10**6 +#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) /* uint32_t _cpu_capabilities */ +#define _COMM_PAGE_NCPUS (_COMM_PAGE_START_ADDRESS+0x022) /* uint8_t number of configured CPUs */ +#define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_START_ADDRESS+0x026) /* uint16_t cache line size */ + +#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_START_ADDRESS+0x028) /* 24 unused bytes */ -#define _COMM_PAGE_UNUSED2 (_COMM_PAGE_START_ADDRESS+0x050) // 16 unused bytes +#if defined(__i386__) /* following are not defined in 64-bit */ +#define _COMM_PAGE_2_TO_52 (_COMM_PAGE_START_ADDRESS+0x040) /* double float constant 2**52 */ +#define _COMM_PAGE_10_TO_6 (_COMM_PAGE_START_ADDRESS+0x048) /* double float constant 10**6 */ +#else +#define _COMM_PAGE_UNUSED2 (_COMM_PAGE_START_ADDRESS+0x040) /* 16 unused bytes */ +#endif + +#define _COMM_PAGE_NT_TSC_BASE (_COMM_PAGE_START_ADDRESS+0x050) /* used by nanotime() */ +#define _COMM_PAGE_NT_SCALE (_COMM_PAGE_START_ADDRESS+0x058) /* used by nanotime() */ +#define _COMM_PAGE_NT_SHIFT (_COMM_PAGE_START_ADDRESS+0x05c) /* used by nanotime() */ +#define _COMM_PAGE_NT_NS_BASE (_COMM_PAGE_START_ADDRESS+0x060) /* used by nanotime() */ -#define _COMM_PAGE_TIMEBASE (_COMM_PAGE_START_ADDRESS+0x060) // used by gettimeofday() -#define _COMM_PAGE_TIMESTAMP (_COMM_PAGE_START_ADDRESS+0x068) // used by gettimeofday() -#define _COMM_PAGE_SEC_PER_TICK (_COMM_PAGE_START_ADDRESS+0x070) // used by gettimeofday() +#define _COMM_PAGE_TIMEBASE (_COMM_PAGE_START_ADDRESS+0x068) /* used by gettimeofday() */ +#define _COMM_PAGE_TIMESTAMP (_COMM_PAGE_START_ADDRESS+0x070) /* used by gettimeofday() */ +#define _COMM_PAGE_TIMEENABLE (_COMM_PAGE_START_ADDRESS+0x078) /* used by gettimeofday() */ + +#define _NT_TSC_BASE 0 /* offsets into nanotime data */ +#define _NT_SCALE 8 +#define _NT_SHIFT 12 +#define _NT_NS_BASE 16 +#define _TIMEBASE 0 /* offsets into gettimeofday data */ +#define _TIMESTAMP 8 +#define _TIMEENABLE 16 - /* jump table (bla to this address, which may be a branch to the actual code somewhere else) */ + /* jump table (jmp to this address, which may be a branch to the actual code somewhere else) */ /* When new jump table entries are added, corresponding symbols should be added below */ -#define _COMM_PAGE_COMPARE_AND_SWAP32 (_COMM_PAGE_START_ADDRESS+0x080) // compare-and-swap word -#define _COMM_PAGE_COMPARE_AND_SWAP64 (_COMM_PAGE_START_ADDRESS+0x0c0) // compare-and-swap doubleword -#define _COMM_PAGE_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x100) // enqueue -#define _COMM_PAGE_DEQUEUE (_COMM_PAGE_START_ADDRESS+0x140) // dequeue -#define _COMM_PAGE_MEMORY_BARRIER (_COMM_PAGE_START_ADDRESS+0x180) // add atomic doubleword -#define _COMM_PAGE_ATOMIC_ADD32 (_COMM_PAGE_START_ADDRESS+0x1a0) // add atomic word -#define _COMM_PAGE_ATOMIC_ADD64 (_COMM_PAGE_START_ADDRESS+0x1c0) // add atomic doubleword - -#define _COMM_PAGE_NANOTIME_INFO (_COMM_PAGE_START_ADDRESS+0x1e0) // 32 bytes used by nanotime() +#define _COMM_PAGE_COMPARE_AND_SWAP32 (_COMM_PAGE_START_ADDRESS+0x080) /* compare-and-swap word */ +#define _COMM_PAGE_COMPARE_AND_SWAP64 (_COMM_PAGE_START_ADDRESS+0x0c0) /* compare-and-swap doubleword */ +#define _COMM_PAGE_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x100) /* enqueue */ +#define _COMM_PAGE_DEQUEUE (_COMM_PAGE_START_ADDRESS+0x140) /* dequeue */ +#define _COMM_PAGE_MEMORY_BARRIER (_COMM_PAGE_START_ADDRESS+0x180) /* memory barrier */ +#define _COMM_PAGE_ATOMIC_ADD32 (_COMM_PAGE_START_ADDRESS+0x1a0) /* add atomic word */ +#define _COMM_PAGE_ATOMIC_ADD64 (_COMM_PAGE_START_ADDRESS+0x1c0) /* add atomic doubleword */ + +#define _COMM_PAGE_UNUSED3 (_COMM_PAGE_START_ADDRESS+0x1e0) /* 32 unused bytes */ + +#define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_START_ADDRESS+0x200) /* mach_absolute_time() */ +#define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_START_ADDRESS+0x220) /* spinlock_try() */ +#define _COMM_PAGE_SPINLOCK_LOCK (_COMM_PAGE_START_ADDRESS+0x260) /* spinlock_lock() */ +#define _COMM_PAGE_SPINLOCK_UNLOCK (_COMM_PAGE_START_ADDRESS+0x2a0) /* spinlock_unlock() */ +#define _COMM_PAGE_PTHREAD_GETSPECIFIC (_COMM_PAGE_START_ADDRESS+0x2c0) /* pthread_getspecific() */ +#define _COMM_PAGE_GETTIMEOFDAY (_COMM_PAGE_START_ADDRESS+0x2e0) /* used by gettimeofday() */ +#define _COMM_PAGE_FLUSH_DCACHE (_COMM_PAGE_START_ADDRESS+0x4e0) /* sys_dcache_flush() */ +#define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_START_ADDRESS+0x520) /* sys_icache_invalidate() */ +#define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_START_ADDRESS+0x580) /* pthread_self() */ + +#define _COMM_PAGE_UNUSED4 (_COMM_PAGE_START_ADDRESS+0x5a0) /* 32 unused bytes */ + +#define _COMM_PAGE_RELINQUISH (_COMM_PAGE_START_ADDRESS+0x5c0) /* used by spinlocks */ +#define _COMM_PAGE_BTS (_COMM_PAGE_START_ADDRESS+0x5e0) /* bit test-and-set */ +#define _COMM_PAGE_BTC (_COMM_PAGE_START_ADDRESS+0x5f0) /* bit test-and-clear */ -#define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_START_ADDRESS+0x200) // mach_absolute_time() -#define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_START_ADDRESS+0x220) // spinlock_try() -#define _COMM_PAGE_SPINLOCK_LOCK (_COMM_PAGE_START_ADDRESS+0x260) // spinlock_lock() -#define _COMM_PAGE_SPINLOCK_UNLOCK (_COMM_PAGE_START_ADDRESS+0x2a0) // spinlock_unlock() -#define _COMM_PAGE_PTHREAD_GETSPECIFIC (_COMM_PAGE_START_ADDRESS+0x2c0) // pthread_getspecific() -#define _COMM_PAGE_GETTIMEOFDAY (_COMM_PAGE_START_ADDRESS+0x2e0) // used by gettimeofday() -#define _COMM_PAGE_FLUSH_DCACHE (_COMM_PAGE_START_ADDRESS+0x4e0) // sys_dcache_flush() -#define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_START_ADDRESS+0x520) // sys_icache_invalidate() -#define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_START_ADDRESS+0x580) // pthread_self() -#define _COMM_PAGE_UNUSED4 (_COMM_PAGE_START_ADDRESS+0x5a0) // 32 unused bytes -#define _COMM_PAGE_RELINQUISH (_COMM_PAGE_START_ADDRESS+0x5c0) // used by spinlocks - -#define _COMM_PAGE_BTS (_COMM_PAGE_START_ADDRESS+0x5e0) // bit test-and-set -#define _COMM_PAGE_BTC (_COMM_PAGE_START_ADDRESS+0x5f0) // bit test-and-clear - -#define _COMM_PAGE_BZERO (_COMM_PAGE_START_ADDRESS+0x600) // bzero() -#define _COMM_PAGE_BCOPY (_COMM_PAGE_START_ADDRESS+0x780) // bcopy() -#define _COMM_PAGE_MEMCPY (_COMM_PAGE_START_ADDRESS+0x7a0) // memcpy() -#define _COMM_PAGE_MEMMOVE (_COMM_PAGE_START_ADDRESS+0x7a0) // memmove() +#define _COMM_PAGE_BZERO (_COMM_PAGE_START_ADDRESS+0x600) /* bzero() */ +#define _COMM_PAGE_BCOPY (_COMM_PAGE_START_ADDRESS+0x780) /* bcopy() */ +#define _COMM_PAGE_MEMCPY (_COMM_PAGE_START_ADDRESS+0x7a0) /* memcpy() */ +#define _COMM_PAGE_MEMMOVE (_COMM_PAGE_START_ADDRESS+0x7a0) /* memmove() */ + +#define _COMM_PAGE_OLD_NANOTIME (_COMM_PAGE_START_ADDRESS+0xf80) /* old nanotime location (deprecated) */ +#define _COMM_PAGE_MEMSET_PATTERN (_COMM_PAGE_START_ADDRESS+0x1000) /* used by nonzero memset() */ +#define _COMM_PAGE_LONGCOPY (_COMM_PAGE_START_ADDRESS+0x1200) /* used by bcopy() for very long operands */ + +#define _COMM_PAGE_SYSTEM_INTEGRITY (_COMM_PAGE_START_ADDRESS+0x1600) /* system integrity data, 256 bytes */ -#define _COMM_PAGE_NANOTIME (_COMM_PAGE_START_ADDRESS+0xF80) // nanotime() +#define _COMM_PAGE_NANOTIME (_COMM_PAGE_START_ADDRESS+0x1700) /* nanotime() */ -#define _COMM_PAGE_BIGCOPY (_COMM_PAGE_START_ADDRESS+0x1000)// very-long-operand copies +#define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1780) /* end of common page - insert new stuff here */ -#define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1600)// end of common page +/* _COMM_PAGE_COMPARE_AND_SWAP{32,64}B are not used on x86 and are + * maintained here for source compatability. These will be removed at + * some point, so don't go relying on them. */ +#define _COMM_PAGE_COMPARE_AND_SWAP32B (_COMM_PAGE_START_ADDRESS+0xf80) /* compare-and-swap word w barrier */ +#define _COMM_PAGE_COMPARE_AND_SWAP64B (_COMM_PAGE_START_ADDRESS+0xfc0) /* compare-and-swap doubleword w barrier */ #ifdef __ASSEMBLER__ #ifdef __COMM_PAGE_SYMBOLS #define CREATE_COMM_PAGE_SYMBOL(symbol_name, symbol_address) \ - .org (symbol_address - (_COMM_PAGE_BASE_ADDRESS & 0xFFFFE000)) ;\ + .org (symbol_address - (_COMM_PAGE_START_ADDRESS & 0xFFFFE000)) ;\ symbol_name: nop - .text // Required to make a well behaved symbol file + .text /* Required to make a well behaved symbol file */ CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32, _COMM_PAGE_COMPARE_AND_SWAP32) CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64, _COMM_PAGE_COMPARE_AND_SWAP64) @@ -179,13 +224,15 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___bzero, _COMM_PAGE_BZERO) CREATE_COMM_PAGE_SYMBOL(___bcopy, _COMM_PAGE_BCOPY) CREATE_COMM_PAGE_SYMBOL(___memcpy, _COMM_PAGE_MEMCPY) -// CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) - CREATE_COMM_PAGE_SYMBOL(___bigcopy, _COMM_PAGE_BIGCOPY) +/* CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) */ + CREATE_COMM_PAGE_SYMBOL(___old_nanotime, _COMM_PAGE_OLD_NANOTIME) + CREATE_COMM_PAGE_SYMBOL(___memset_pattern, _COMM_PAGE_MEMSET_PATTERN) + CREATE_COMM_PAGE_SYMBOL(___longcopy, _COMM_PAGE_LONGCOPY) CREATE_COMM_PAGE_SYMBOL(___nanotime, _COMM_PAGE_NANOTIME) CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END) - .data // Required to make a well behaved symbol file - .long 0 // Required to make a well behaved symbol file + .data /* Required to make a well behaved symbol file */ + .long 0 /* Required to make a well behaved symbol file */ #endif /* __COMM_PAGE_SYMBOLS */ #endif /* __ASSEMBLER__ */ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 172738bc9..8ad34142a 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,15 +34,18 @@ #include #include #include +#include #include - +#include +#include +#include /* * Data structures referenced (anonymously) from per-cpu data: */ struct cpu_core; struct cpu_cons_buffer; -struct mp_desc_table; +struct cpu_desc_table; /* @@ -55,12 +58,10 @@ typedef struct rtclock_timer { } rtclock_timer_t; typedef struct { - uint64_t rnt_tsc; /* timestamp */ - uint64_t rnt_nanos; /* nanoseconds */ - uint32_t rnt_scale; /* tsc -> nanosec multiplier */ - uint32_t rnt_shift; /* tsc -> nanosec shift/div */ - uint64_t rnt_step_tsc; /* tsc when scale applied */ - uint64_t rnt_step_nanos; /* ns when scale applied */ + uint64_t tsc_base; /* timestamp */ + uint64_t ns_base; /* nanoseconds */ + uint32_t scale; /* tsc -> nanosec multiplier */ + uint32_t shift; /* tsc -> nanosec shift/div */ } rtc_nanotime_t; typedef struct { @@ -71,8 +72,26 @@ typedef struct { struct fake_descriptor *cdi_gdt; struct fake_descriptor *cdi_idt; struct fake_descriptor *cdi_ldt; + vm_offset_t cdi_sstk; } cpu_desc_index_t; +typedef enum { + TASK_MAP_32BIT, /* 32-bit, compatibility mode */ + TASK_MAP_64BIT, /* 64-bit, separate address space */ + TASK_MAP_64BIT_SHARED /* 64-bit, kernel-shared addr space */ +} task_map_t; + +/* + * This structure is used on entry into the (uber-)kernel on syscall from + * a 64-bit user. It contains the address of the machine state save area + * for the current thread and a temporary place to save the user's rsp + * before loading this address into rsp. + */ +typedef struct { + addr64_t cu_isf; /* thread->pcb->iss.isf */ + uint64_t cu_tmp; /* temporary scratch */ + addr64_t cu_user_gs_base; +} cpu_uber_t; /* * Per-cpu data. @@ -91,9 +110,9 @@ typedef struct cpu_data { struct cpu_data *cpu_this; /* pointer to myself */ thread_t cpu_active_thread; - thread_t cpu_active_kloaded; - vm_offset_t cpu_active_stack; - vm_offset_t cpu_kernel_stack; + void *cpu_int_state; /* interrupt state */ + vm_offset_t cpu_active_stack; /* kernel stack base */ + vm_offset_t cpu_kernel_stack; /* kernel stack top */ vm_offset_t cpu_int_stack_top; int cpu_preemption_level; int cpu_simple_lock_count; @@ -108,27 +127,53 @@ typedef struct cpu_data int cpu_subtype; int cpu_threadtype; int cpu_running; - struct cpu_core *cpu_core; /* cpu's parent core */ - uint64_t cpu_rtc_tick_deadline; - uint64_t cpu_rtc_intr_deadline; - rtclock_timer_t cpu_rtc_timer; - rtc_nanotime_t cpu_rtc_nanotime; + uint64_t rtclock_intr_deadline; + rtclock_timer_t rtclock_timer; + boolean_t cpu_is64bit; + task_map_t cpu_task_map; + addr64_t cpu_task_cr3; + addr64_t cpu_active_cr3; + addr64_t cpu_kernel_cr3; + cpu_uber_t cpu_uber; + void *cpu_chud; void *cpu_console_buf; + struct cpu_core *cpu_core; /* cpu's parent core */ struct processor *cpu_processor; struct cpu_pmap *cpu_pmap; - struct mp_desc_table *cpu_desc_tablep; + struct cpu_desc_table *cpu_desc_tablep; + struct fake_descriptor *cpu_ldtp; cpu_desc_index_t cpu_desc_index; - boolean_t cpu_iflag; + int cpu_ldt; #ifdef MACH_KDB /* XXX Untested: */ int cpu_db_pass_thru; - vm_offset_t cpu_db_stacks; - struct i386_saved_state *cpu_kdb_saved_state; - spl_t cpu_kdb_saved_ipl; + vm_offset_t cpu_db_stacks; + void *cpu_kdb_saved_state; + spl_t cpu_kdb_saved_ipl; int cpu_kdb_is_slave; int cpu_kdb_active; #endif /* MACH_KDB */ - int cpu_hibernate; + boolean_t cpu_iflag; + boolean_t cpu_boot_complete; + int cpu_hibernate; + pmsd pms; /* Power Management Stepper control */ + uint64_t rtcPop; /* when the etimer wants a timer pop */ + + vm_offset_t cpu_copywindow_base; + uint64_t *cpu_copywindow_pdp; + + vm_offset_t cpu_physwindow_base; + uint64_t *cpu_physwindow_ptep; + void *cpu_hi_iss; + boolean_t cpu_tlb_invalid; + + uint64_t *cpu_pmHpet; /* Address of the HPET for this processor */ + uint32_t cpu_pmHpetVec; /* Interrupt vector for HPET for this processor */ +/* Statistics */ + pmStats_t cpu_pmStats; /* Power management data */ + uint32_t cpu_hwIntCnt[256]; /* Interrupt counts */ + + uint64_t cpu_dr7; /* debug control register */ } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; @@ -156,6 +201,13 @@ get_active_thread(void) #define current_thread_fast() get_active_thread() #define current_thread() current_thread_fast() +static inline boolean_t +get_is64bit(void) +{ + CPU_DATA_GET(cpu_is64bit, boolean_t) +} +#define cpu_mode_is64bit() get_is64bit() + static inline int get_preemption_level(void) { diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index 52de5180b..0a4c3d5e2 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * Kernel parameter determining whether threads are halted unconditionally @@ -33,63 +34,81 @@ */ int idlehalt = 1; -void -cpu_thread_init(void) + +static boolean_t +cpu_is_hyperthreaded(void) { - int my_cpu = get_cpu_number(); - int my_core_base_cpu; - int ret; - cpu_core_t *my_core; + if (cpuid_features() & CPUID_FEATURE_HTT) + return (cpuid_info()->cpuid_logical_per_package / + cpuid_info()->cpuid_cores_per_package) > 1; + else + return FALSE; +} - /* Have we initialized already for this cpu? */ - if (cpu_core()) - return; +void * +cpu_thread_alloc(int cpu) +{ + int core_base_cpu; + int ret; + cpu_core_t *core; - if (cpuid_features() & CPUID_FEATURE_HTT) { + /* + * Assume that all cpus have the same features. + */ + if (cpu_is_hyperthreaded()) { /* * Get the cpu number of the base thread in the core. */ - my_core_base_cpu = cpu_to_core_cpu(my_cpu); - current_cpu_datap()->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT; + core_base_cpu = cpu_to_core_cpu(cpu); + cpu_datap(cpu)->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT; } else { - my_core_base_cpu = my_cpu; - current_cpu_datap()->cpu_threadtype = CPU_THREADTYPE_NONE; + core_base_cpu = cpu; + cpu_datap(cpu)->cpu_threadtype = CPU_THREADTYPE_NONE; } - /* - * Allocate the base cpu_core struct if none exists. - * Since we could be racing with other threads in the same core, - * this needs care without using locks. We allocate a new core - * structure and assign it atomically, freeing it if we lost the race. - */ - my_core = (cpu_core_t *) cpu_to_core(my_core_base_cpu); - if (my_core == NULL) { - cpu_core_t *new_core; - + core = (cpu_core_t *) cpu_to_core(core_base_cpu); + if (core == NULL) { ret = kmem_alloc(kernel_map, - (void *) &new_core, sizeof(cpu_core_t)); + (void *) &core, sizeof(cpu_core_t)); if (ret != KERN_SUCCESS) - panic("cpu_thread_init() kmem_alloc ret=%d\n", ret); - bzero((void *) new_core, sizeof(cpu_core_t)); - new_core->base_cpu = my_core_base_cpu; - if (atomic_cmpxchg((uint32_t *) &cpu_to_core(my_core_base_cpu), - 0, (uint32_t) new_core)) { - atomic_incl((long *) &machine_info.physical_cpu, 1); - atomic_incl((long *) &machine_info.physical_cpu_max, 1); - } else { - kmem_free(kernel_map, - (vm_offset_t)new_core, sizeof(cpu_core_t)); - } - my_core = (cpu_core_t *) cpu_to_core(my_core_base_cpu); + panic("cpu_thread_alloc() kmem_alloc ret=%d\n", ret); + bzero((void *) core, sizeof(cpu_core_t)); + + core->base_cpu = core_base_cpu; + + atomic_incl((long *) &machine_info.physical_cpu_max, 1); + + /* Allocate performance counter data area (if available) */ + core->pmc = pmc_alloc(); } + atomic_incl((long *) &machine_info.logical_cpu_max, 1); - cpu_to_core(my_cpu) = (struct cpu_core *) my_core; + return (void *) core; +} + +void +cpu_thread_init(void) +{ + int my_cpu = get_cpu_number(); + cpu_core_t *my_core; + + /* + * If we're the boot processor we allocate the core structure here. + * Otherwise the core has already been allocated (by the boot cpu). + */ + if (my_cpu == master_cpu) + cpu_to_core(master_cpu) = cpu_thread_alloc(master_cpu); + + my_core = cpu_core(); + if (my_core == NULL) + panic("cpu_thread_init() no core allocated for cpu %d", my_cpu); atomic_incl((long *) &my_core->active_threads, 1); - atomic_incl((long *) &my_core->num_threads, 1); atomic_incl((long *) &machine_info.logical_cpu, 1); - atomic_incl((long *) &machine_info.logical_cpu_max, 1); - + /* Note: cpus are started serially so this isn't as racey as it looks */ + if (my_core->num_threads == 0) + atomic_incl((long *) &machine_info.physical_cpu, 1); + atomic_incl((long *) &my_core->num_threads, 1); } /* @@ -101,10 +120,10 @@ cpu_thread_halt(void) { cpu_core_t *my_core = cpu_core(); - /* Note: don't ever decrement the number of physical processors */ - atomic_decl((long *) &my_core->active_threads, 1); - atomic_decl((long *) &my_core->num_threads, 1); atomic_decl((long *) &machine_info.logical_cpu, 1); + atomic_decl((long *) &my_core->active_threads, 1); + if (atomic_decl_and_test((long *) &my_core->num_threads, 1)) + atomic_decl((long *) &machine_info.physical_cpu, 1); cpu_halt(); } diff --git a/osfmk/i386/cpu_threads.h b/osfmk/i386/cpu_threads.h index ba98631be..5f7931cba 100644 --- a/osfmk/i386/cpu_threads.h +++ b/osfmk/i386/cpu_threads.h @@ -47,10 +47,10 @@ typedef struct { #define cpu_is_same_core(cpu1,cpu2) (cpu_to_core(cpu1) == cpu_to_core(cpu2)) +extern void *cpu_thread_alloc(int); extern void cpu_thread_init(void); extern void cpu_thread_halt(void); extern int idlehalt; -extern int ncore; #endif /* _I386_CPU_THREADS_H_ */ diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index e6dbc551c..86fa8959d 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -22,12 +22,28 @@ /* * @OSF_COPYRIGHT@ */ - +#include +#include #include #include "cpuid.h" +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif #define min(a,b) ((a) < (b) ? (a) : (b)) +#define quad(hi,lo) (((uint64_t)(hi)) << 32 | (lo)) + +#define bit(n) (1UL << (n)) +#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) +#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l) /* * CPU identification routines. @@ -72,10 +88,10 @@ cpuid_get_info(i386_cpu_info_t *info_p) /* do cpuid 0 to get vendor */ do_cpuid(0, cpuid_result); - cpuid_maxcpuid = cpuid_result[0]; - bcopy((char *)&cpuid_result[1], &info_p->cpuid_vendor[0], 4); /* ugh */ - bcopy((char *)&cpuid_result[2], &info_p->cpuid_vendor[8], 4); - bcopy((char *)&cpuid_result[3], &info_p->cpuid_vendor[4], 4); + cpuid_maxcpuid = cpuid_result[eax]; + bcopy((char *)&cpuid_result[ebx], &info_p->cpuid_vendor[0], 4); /* ug */ + bcopy((char *)&cpuid_result[ecx], &info_p->cpuid_vendor[8], 4); + bcopy((char *)&cpuid_result[edx], &info_p->cpuid_vendor[4], 4); info_p->cpuid_vendor[12] = 0; /* look up vendor */ @@ -155,6 +171,8 @@ CACHE_DESC(CPUID_CACHE_UCACHE_1M, L2U, 1*1024*1024, 32, \ "Unified L2 cache, 1M, 4-way set associative, 32byte line size"), CACHE_DESC(CPUID_CACHE_UCACHE_2M, L2U, 2*1024*1024, 32, \ "Unified L2 cache, 2M, 4-way set associative, 32byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_4M, L2U, 4*1024*1024, 64, \ + "Unified L2 cache, 4M, 16-way set associative, 64byte line size"), CACHE_DESC(CPUID_CACHE_UCACHE_128K_64, L2U, 128*1024, 64, \ "Unified L2 cache, 128K, 8-way set associative, 64byte line size"), CACHE_DESC(CPUID_CACHE_UCACHE_256K_64, L2U, 256*1024, 64, \ @@ -205,9 +223,12 @@ CACHE_DESC(CPUID_CACHE_NULL, Lnone, 0, 0, \ (char *)0), }; -static const char * get_intel_model_string( i386_cpu_info_t * info_p ) +static const char * get_intel_model_string( i386_cpu_info_t * info_p, cpu_type_t* type, cpu_subtype_t* subtype) { - /* check for brand id */ + *type = CPU_TYPE_X86; + *subtype = CPU_SUBTYPE_X86_ARCH1; + + /* check for brand id string */ switch(info_p->cpuid_brand) { case CPUID_BRAND_UNSUPPORTED: /* brand ID not supported; use alternate method. */ @@ -236,14 +257,13 @@ static const char * get_intel_model_string( i386_cpu_info_t * info_p ) default: return "Unknown Intel P6 Family"; } - case CPUID_FAMILY_ITANIUM: - return "Intel Itanium"; case CPUID_FAMILY_EXTENDED: switch (info_p->cpuid_extfamily) { case CPUID_EXTFAMILY_PENTIUM4: + *subtype = CPU_SUBTYPE_PENTIUM_4; return "Intel Pentium 4"; - case CPUID_EXTFAMILY_ITANIUM2: - return "Intel Itanium 2"; + default: + return "Unknown Intel Extended Family"; } default: return "Unknown Intel Family"; @@ -257,10 +277,11 @@ static const char * get_intel_model_string( i386_cpu_info_t * info_p ) case CPUID_BRAND_PENTIUM_III_4: return "Pentium III"; case CPUID_BRAND_PIII_XEON: - if (info_p->cpuid_signature == 0x6B1) - return "Intel Celeron"; - else - return "Intel Pentium III Xeon"; + if (info_p->cpuid_signature == 0x6B1) { + return "Intel Celeron"; + } else { + return "Intel Pentium III Xeon"; + } case CPUID_BRAND_PENTIUM_III_M: return "Mobile Intel Pentium III-M"; case CPUID_BRAND_M_CELERON_7: @@ -270,16 +291,19 @@ static const char * get_intel_model_string( i386_cpu_info_t * info_p ) return "Mobile Intel Celeron"; case CPUID_BRAND_PENTIUM4_8: case CPUID_BRAND_PENTIUM4_9: + *subtype = CPU_SUBTYPE_PENTIUM_4; return "Intel Pentium 4"; case CPUID_BRAND_XEON: return "Intel Xeon"; case CPUID_BRAND_XEON_MP: return "Intel Xeon MP"; case CPUID_BRAND_PENTIUM4_M: - if (info_p->cpuid_signature == 0xF13) - return "Intel Xeon"; - else - return "Mobile Intel Pentium 4"; + if (info_p->cpuid_signature == 0xF13) { + return "Intel Xeon"; + } else { + *subtype = CPU_SUBTYPE_PENTIUM_4; + return "Mobile Intel Pentium 4"; + } case CPUID_BRAND_CELERON_M: return "Intel Celeron M"; case CPUID_BRAND_PENTIUM_M: @@ -288,7 +312,6 @@ static const char * get_intel_model_string( i386_cpu_info_t * info_p ) case CPUID_BRAND_MOBILE_17: return "Mobile Intel"; } - return "Unknown Intel"; } @@ -347,17 +370,69 @@ static void set_intel_cache_info( i386_cpu_info_t * info_p ) /* If we have no L2 cache, use the L1 data cache line size */ if (info_p->cache_size[L2U] == 0) info_p->cache_linesize = l1d_cache_linesize; + + /* + * Get cache sharing info if available. + */ + do_cpuid(0, cpuid_result); + if (cpuid_result[eax] >= 4) { + uint32_t reg[4]; + uint32_t index; + for (index = 0;; index++) { + /* + * Scan making calls for cpuid with %eax = 4 + * to get info about successive cache levels + * until a null type is returned. + */ + cache_type_t type = Lnone; + uint32_t cache_type; + uint32_t cache_level; + uint32_t cache_sharing; + + reg[eax] = 4; /* cpuid request 4 */ + reg[ecx] = index; /* index starting at 0 */ + cpuid(reg); +//kprintf("cpuid(4) index=%d eax=%p\n", index, reg[eax]); + cache_type = bitfield(reg[eax], 4, 0); + if (cache_type == 0) + break; /* done with cache info */ + cache_level = bitfield(reg[eax], 7, 5); + cache_sharing = bitfield(reg[eax], 25, 14); + info_p->cpuid_cores_per_package = + bitfield(reg[eax], 31, 26) + 1; + switch (cache_level) { + case 1: + type = cache_type == 1 ? L1D : + cache_type == 2 ? L1I : + Lnone; + break; + case 2: + type = cache_type == 3 ? L2U : + Lnone; + break; + case 3: + type = cache_type == 3 ? L3U : + Lnone; + } + if (type != Lnone) + info_p->cache_sharing[type] = cache_sharing + 1; + } + } } static void set_cpu_intel( i386_cpu_info_t * info_p ) { set_cpu_generic(info_p); set_intel_cache_info(info_p); - info_p->cpuid_model_string = get_intel_model_string(info_p); + info_p->cpuid_model_string = get_intel_model_string(info_p, &info_p->cpuid_cpu_type, &info_p->cpuid_cpu_subtype); } -static const char * get_amd_model_string( i386_cpu_info_t * info_p ) +static const char * get_amd_model_string( i386_cpu_info_t * info_p, cpu_type_t* type, cpu_subtype_t* subtype ) { + *type = CPU_TYPE_X86; + *subtype = CPU_SUBTYPE_X86_ARCH1; + + /* check for brand id string */ switch (info_p->cpuid_family) { case CPUID_FAMILY_486: @@ -438,11 +513,11 @@ static void set_amd_cache_info( i386_cpu_info_t * info_p ) /* (ignore) */ /* ECX: L1 Data Cache Information */ - info_p->cache_size[L1D] = ((cpuid_result[2] >> 24) & 0xFF) * 1024; - info_p->cache_linesize = (cpuid_result[2] & 0xFF); + info_p->cache_size[L1D] = ((cpuid_result[ecx] >> 24) & 0xFF) * 1024; + info_p->cache_linesize = (cpuid_result[ecx] & 0xFF); /* EDX: L1 Instruction Cache Information */ - info_p->cache_size[L1I] = ((cpuid_result[3] >> 24) & 0xFF) * 1024; + info_p->cache_size[L1I] = ((cpuid_result[edx] >> 24) & 0xFF) * 1024; /* L2 Cache Information */ do_cpuid(0x80000006, cpuid_result); @@ -454,16 +529,16 @@ static void set_amd_cache_info( i386_cpu_info_t * info_p ) /* (ignore) */ /* ECX: L2 Cache Information */ - info_p->cache_size[L2U] = ((cpuid_result[2] >> 16) & 0xFFFF) * 1024; + info_p->cache_size[L2U] = ((cpuid_result[ecx] >> 16) & 0xFFFF) * 1024; if (info_p->cache_size[L2U] > 0) - info_p->cache_linesize = cpuid_result[2] & 0xFF; + info_p->cache_linesize = cpuid_result[ecx] & 0xFF; } static void set_cpu_amd( i386_cpu_info_t * info_p ) { set_cpu_generic(info_p); set_amd_cache_info(info_p); - info_p->cpuid_model_string = get_amd_model_string(info_p); + info_p->cpuid_model_string = get_amd_model_string(info_p, &info_p->cpuid_cpu_type, &info_p->cpuid_cpu_subtype); } static void set_cpu_nsc( i386_cpu_info_t * info_p ) @@ -471,12 +546,16 @@ static void set_cpu_nsc( i386_cpu_info_t * info_p ) set_cpu_generic(info_p); set_amd_cache_info(info_p); - if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX1) + /* check for brand id string */ + if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX1) { info_p->cpuid_model_string = "AMD Geode GX1"; - else if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX2) + } else if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX2) { info_p->cpuid_model_string = "AMD Geode GX"; - else + } else { info_p->cpuid_model_string = "Unknown National Semiconductor"; + } + info_p->cpuid_cpu_type = CPU_TYPE_X86; + info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1; } static void @@ -488,7 +567,7 @@ set_cpu_generic(i386_cpu_info_t *info_p) /* get extended cpuid results */ do_cpuid(0x80000000, cpuid_result); - max_extid = cpuid_result[0]; + max_extid = cpuid_result[eax]; /* check to see if we can get brand string */ if (max_extid >= 0x80000004) { @@ -520,15 +599,23 @@ set_cpu_generic(i386_cpu_info_t *info_p) /* get processor signature and decode */ do_cpuid(1, cpuid_result); - info_p->cpuid_signature = cpuid_result[0]; - info_p->cpuid_stepping = cpuid_result[0] & 0x0f; - info_p->cpuid_model = (cpuid_result[0] >> 4) & 0x0f; - info_p->cpuid_family = (cpuid_result[0] >> 8) & 0x0f; - info_p->cpuid_type = (cpuid_result[0] >> 12) & 0x03; - info_p->cpuid_extmodel = (cpuid_result[0] >> 16) & 0x0f; - info_p->cpuid_extfamily = (cpuid_result[0] >> 20) & 0xff; - info_p->cpuid_brand = cpuid_result[1] & 0xff; - info_p->cpuid_features = cpuid_result[3]; + info_p->cpuid_signature = cpuid_result[eax]; + info_p->cpuid_stepping = bitfield(cpuid_result[eax], 3, 0); + info_p->cpuid_model = bitfield(cpuid_result[eax], 7, 4); + info_p->cpuid_family = bitfield(cpuid_result[eax], 11, 8); + info_p->cpuid_type = bitfield(cpuid_result[eax], 13, 12); + info_p->cpuid_extmodel = bitfield(cpuid_result[eax], 19, 16); + info_p->cpuid_extfamily = bitfield(cpuid_result[eax], 27, 20); + info_p->cpuid_brand = bitfield(cpuid_result[ebx], 7, 0); + info_p->cpuid_logical_per_package = + bitfield(cpuid_result[ebx], 23, 16); + info_p->cpuid_features = quad(cpuid_result[ecx], cpuid_result[edx]); + + if (max_extid >= 0x80000001) { + do_cpuid(0x80000001, cpuid_result); + info_p->cpuid_extfeatures = + quad(cpuid_result[ecx], cpuid_result[edx]); + } return; } @@ -541,9 +628,9 @@ set_cpu_unknown(__unused i386_cpu_info_t *info_p) static struct { - uint32_t mask; + uint64_t mask; const char *name; -} feature_names[] = { +} feature_map[] = { {CPUID_FEATURE_FPU, "FPU",}, {CPUID_FEATURE_VME, "VME",}, {CPUID_FEATURE_DE, "DE",}, @@ -572,25 +659,65 @@ static struct { {CPUID_FEATURE_SS, "SS",}, {CPUID_FEATURE_HTT, "HTT",}, {CPUID_FEATURE_TM, "TM",}, + {CPUID_FEATURE_SSE3, "SSE3"}, + {CPUID_FEATURE_MONITOR, "MON"}, + {CPUID_FEATURE_DSCPL, "DSCPL"}, + {CPUID_FEATURE_VMX, "VMX"}, + {CPUID_FEATURE_SMX, "SMX"}, + {CPUID_FEATURE_EST, "EST"}, + {CPUID_FEATURE_TM2, "TM2"}, + {CPUID_FEATURE_MNI, "MNI"}, + {CPUID_FEATURE_CID, "CID"}, + {CPUID_FEATURE_CX16, "CX16"}, + {CPUID_FEATURE_xTPR, "TPR"}, + {0, 0} +}, +extfeature_map[] = { + {CPUID_EXTFEATURE_SYSCALL, "SYSCALL"}, + {CPUID_EXTFEATURE_XD, "XD"}, + {CPUID_EXTFEATURE_EM64T, "EM64T"}, + {CPUID_EXTFEATURE_LAHF, "LAHF"}, {0, 0} }; char * -cpuid_get_feature_names(uint32_t feature, char *buf, unsigned buf_len) +cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) { + int len = -1; + char *p = buf; int i; - int len; + + for (i = 0; feature_map[i].mask != 0; i++) { + if ((features & feature_map[i].mask) == 0) + continue; + if (len > 0) + *p++ = ' '; + len = min(strlen(feature_map[i].name), (buf_len-1) - (p-buf)); + if (len == 0) + break; + bcopy(feature_map[i].name, p, len); + p += len; + } + *p = '\0'; + return buf; +} + +char * +cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) +{ + int len = -1; char *p = buf; + int i; - for (i = 0; feature_names[i].mask != 0; i++) { - if ((feature & feature_names[i].mask) == 0) + for (i = 0; extfeature_map[i].mask != 0; i++) { + if ((extfeatures & extfeature_map[i].mask) == 0) continue; - if (i > 0) + if (len > 0) *p++ = ' '; - len = min(strlen(feature_names[i].name), (buf_len-1) - (p-buf)); + len = min(strlen(extfeature_map[i].name), (buf_len-1)-(p-buf)); if (len == 0) break; - bcopy(feature_names[i].name, p, len); + bcopy(extfeature_map[i].name, p, len); p += len; } *p = '\0'; @@ -599,23 +726,41 @@ cpuid_get_feature_names(uint32_t feature, char *buf, unsigned buf_len) void cpuid_feature_display( - const char *header, - __unused int my_cpu) + const char *header) +{ + char buf[256]; + + kprintf("%s: %s\n", header, + cpuid_get_feature_names(cpuid_features(), + buf, sizeof(buf))); + if (cpuid_features() & CPUID_FEATURE_HTT) { +#define s_if_plural(n) ((n > 1) ? "s" : "") + kprintf(" HTT: %d core%s per package;" + " %d logical cpu%s per package\n", + cpuid_cpu_info.cpuid_cores_per_package, + s_if_plural(cpuid_cpu_info.cpuid_cores_per_package), + cpuid_cpu_info.cpuid_logical_per_package, + s_if_plural(cpuid_cpu_info.cpuid_logical_per_package)); + } +} + +void +cpuid_extfeature_display( + const char *header) { char buf[256]; - printf("%s: %s\n", header, - cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf))); + kprintf("%s: %s\n", header, + cpuid_get_extfeature_names(cpuid_extfeatures(), + buf, sizeof(buf))); } void cpuid_cpu_display( - const char *header, - __unused int my_cpu) + const char *header) { if (cpuid_cpu_info.cpuid_brand_string[0] != '\0') { - printf("%s: %s\n", header, - cpuid_cpu_info.cpuid_brand_string); + kprintf("%s: %s\n", header, cpuid_cpu_info.cpuid_brand_string); } } @@ -625,7 +770,19 @@ cpuid_family(void) return cpuid_cpu_info.cpuid_family; } -unsigned int +cpu_type_t +cpuid_cputype(void) +{ + return cpuid_cpu_info.cpuid_cpu_type; +} + +cpu_subtype_t +cpuid_cpusubtype(void) +{ + return cpuid_cpu_info.cpuid_cpu_subtype; +} + +uint64_t cpuid_features(void) { static int checked = 0; @@ -647,17 +804,65 @@ cpuid_features(void) return cpuid_cpu_info.cpuid_features; } +uint64_t +cpuid_extfeatures(void) +{ + return cpuid_cpu_info.cpuid_extfeatures; +} + i386_cpu_info_t * cpuid_info(void) { return &cpuid_cpu_info; } -/* XXX for temporary compatibility */ void -set_cpu_model(void) +cpuid_set_info(void) { cpuid_get_info(&cpuid_cpu_info); - cpuid_feature = cpuid_cpu_info.cpuid_features; /* XXX compat */ } +#if MACH_KDB + +/* + * Display the cpuid + * * + * cp + */ +void +db_cpuid(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + + uint32_t i, mid; + uint32_t cpid[4]; + + do_cpuid(0, cpid); /* Get the first cpuid which is the number of + * basic ids */ + db_printf("%08X - %08X %08X %08X %08X\n", + 0, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]); + + mid = cpid[eax]; /* Set the number */ + for (i = 1; i <= mid; i++) { /* Dump 'em out */ + do_cpuid(i, cpid); /* Get the next */ + db_printf("%08X - %08X %08X %08X %08X\n", + i, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]); + } + db_printf("\n"); + + do_cpuid(0x80000000, cpid); /* Get the first extended cpuid which + * is the number of extended ids */ + db_printf("%08X - %08X %08X %08X %08X\n", + 0x80000000, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]); + + mid = cpid[eax]; /* Set the number */ + for (i = 0x80000001; i <= mid; i++) { /* Dump 'em out */ + do_cpuid(i, cpid); /* Get the next */ + db_printf("%08X - %08X %08X %08X %08X\n", + i, cpid[eax], cpid[ebx], cpid[ecx], cpid[edx]); + } +} + +#endif diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index cf3512b0a..263e3f8a1 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -49,34 +49,66 @@ #define CPUID_STRING_UNKNOWN "Unknown CPU Typ" -#define CPUID_FEATURE_FPU 0x00000001 /* Floating point unit on-chip */ -#define CPUID_FEATURE_VME 0x00000002 /* Virtual Mode Extension */ -#define CPUID_FEATURE_DE 0x00000004 /* Debugging Extension */ -#define CPUID_FEATURE_PSE 0x00000008 /* Page Size Extension */ -#define CPUID_FEATURE_TSC 0x00000010 /* Time Stamp Counter */ -#define CPUID_FEATURE_MSR 0x00000020 /* Model Specific Registers */ -#define CPUID_FEATURE_PAE 0x00000040 /* Physical Address Extension */ -#define CPUID_FEATURE_MCE 0x00000080 /* Machine Check Exception */ -#define CPUID_FEATURE_CX8 0x00000100 /* CMPXCHG8B */ -#define CPUID_FEATURE_APIC 0x00000200 /* On-chip APIC */ -#define CPUID_FEATURE_SEP 0x00000800 /* Fast System Call */ -#define CPUID_FEATURE_MTRR 0x00001000 /* Memory Type Range Register */ -#define CPUID_FEATURE_PGE 0x00002000 /* Page Global Enable */ -#define CPUID_FEATURE_MCA 0x00004000 /* Machine Check Architecture */ -#define CPUID_FEATURE_CMOV 0x00008000 /* Conditional Move Instruction */ -#define CPUID_FEATURE_PAT 0x00010000 /* Page Attribute Table */ -#define CPUID_FEATURE_PSE36 0x00020000 /* 36-bit Page Size Extension */ -#define CPUID_FEATURE_PSN 0x00040000 /* Processor Serial Number */ -#define CPUID_FEATURE_CLFSH 0x00080000 /* CLFLUSH Instruction supported */ -#define CPUID_FEATURE_DS 0x00200000 /* Debug Store */ -#define CPUID_FEATURE_ACPI 0x00400000 /* Thermal Monitor, SW-controlled clock */ -#define CPUID_FEATURE_MMX 0x00800000 /* MMX supported */ -#define CPUID_FEATURE_FXSR 0x01000000 /* Fast floating point save/restore */ -#define CPUID_FEATURE_SSE 0x02000000 /* Streaming SIMD extensions */ -#define CPUID_FEATURE_SSE2 0x04000000 /* Streaming SIMD extensions 2 */ -#define CPUID_FEATURE_SS 0x08000000 /* Self-Snoop */ -#define CPUID_FEATURE_HTT 0x10000000 /* Hyper-Threading Technology */ -#define CPUID_FEATURE_TM 0x20000000 /* Thermal Monitor */ +#define _Bit(n) (1ULL << n) +#define _HBit(n) (1ULL << ((n)+32)) + +/* + * The CPUID_FEATURE_XXX values define 64-bit values + * returned in %ecx:%edx to a CPUID request with %eax of 1: + */ +#define CPUID_FEATURE_FPU _Bit(0) /* Floating point unit on-chip */ +#define CPUID_FEATURE_VME _Bit(1) /* Virtual Mode Extension */ +#define CPUID_FEATURE_DE _Bit(2) /* Debugging Extension */ +#define CPUID_FEATURE_PSE _Bit(3) /* Page Size Extension */ +#define CPUID_FEATURE_TSC _Bit(4) /* Time Stamp Counter */ +#define CPUID_FEATURE_MSR _Bit(5) /* Model Specific Registers */ +#define CPUID_FEATURE_PAE _Bit(6) /* Physical Address Extension */ +#define CPUID_FEATURE_MCE _Bit(7) /* Machine Check Exception */ +#define CPUID_FEATURE_CX8 _Bit(8) /* CMPXCHG8B */ +#define CPUID_FEATURE_APIC _Bit(9) /* On-chip APIC */ +#define CPUID_FEATURE_SEP _Bit(11) /* Fast System Call */ +#define CPUID_FEATURE_MTRR _Bit(12) /* Memory Type Range Register */ +#define CPUID_FEATURE_PGE _Bit(13) /* Page Global Enable */ +#define CPUID_FEATURE_MCA _Bit(14) /* Machine Check Architecture */ +#define CPUID_FEATURE_CMOV _Bit(15) /* Conditional Move Instruction */ +#define CPUID_FEATURE_PAT _Bit(16) /* Page Attribute Table */ +#define CPUID_FEATURE_PSE36 _Bit(17) /* 36-bit Page Size Extension */ +#define CPUID_FEATURE_PSN _Bit(18) /* Processor Serial Number */ +#define CPUID_FEATURE_CLFSH _Bit(19) /* CLFLUSH Instruction supported */ +#define CPUID_FEATURE_DS _Bit(21) /* Debug Store */ +#define CPUID_FEATURE_ACPI _Bit(22) /* Thermal monitor and Clock Ctrl */ +#define CPUID_FEATURE_MMX _Bit(23) /* MMX supported */ +#define CPUID_FEATURE_FXSR _Bit(24) /* Fast floating pt save/restore */ +#define CPUID_FEATURE_SSE _Bit(25) /* Streaming SIMD extensions */ +#define CPUID_FEATURE_SSE2 _Bit(26) /* Streaming SIMD extensions 2 */ +#define CPUID_FEATURE_SS _Bit(27) /* Self-Snoop */ +#define CPUID_FEATURE_HTT _Bit(28) /* Hyper-Threading Technology */ +#define CPUID_FEATURE_TM _Bit(29) /* Thermal Monitor (TM1) */ +#define CPUID_FEATURE_PBE _Bit(31) /* Pend Break Enable */ + +#define CPUID_FEATURE_SSE3 _HBit(0) /* Prescott New Inst. */ +#define CPUID_FEATURE_MONITOR _HBit(3) /* Monitor/mwait */ +#define CPUID_FEATURE_DSCPL _HBit(4) /* Debug Store CPL */ +#define CPUID_FEATURE_VMX _HBit(5) /* VMX */ +#define CPUID_FEATURE_SMX _HBit(6) /* SMX */ +#define CPUID_FEATURE_EST _HBit(7) /* Enhanced SpeedsTep (GV3) */ +#define CPUID_FEATURE_TM2 _HBit(8) /* Thermal Monitor 2 */ +#define CPUID_FEATURE_SSSE3 _HBit(9) /* Supplemental SSE3 instructions */ +#define CPUID_FEATURE_MNI CPUID_FEATURE_SSSE3 +#define CPUID_FEATURE_CID _HBit(10) /* L1 Context ID */ +#define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */ +#define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */ + +/* + * The CPUID_EXTFEATURE_XXX values define 64-bit values + * returned in %ecx:%edx to a CPUID request with %eax of 0x80000001: + */ +#define CPUID_EXTFEATURE_SYSCALL _Bit(11) /* SYSCALL/sysret */ +#define CPUID_EXTFEATURE_XD _Bit(20) /* eXecute Disable */ +#define CPUID_EXTFEATURE_EM64T _Bit(29) /* Extended Mem 64 Technology */ + +#define CPUID_EXTFEATURE_LAHF _HBit(20) /* LAFH/SAHF instructions */ + #define CPUID_TYPE_OEM 0x0 /* Original processor */ #define CPUID_TYPE_OVERDRIVE 0x1 /* Overdrive processor */ @@ -146,6 +178,8 @@ #define CPUID_MODEL_P6A 0xA /* Intel PIII Xeon model A */ #define CPUID_MODEL_P6B 0xB /* Intel PIII model B */ #define CPUID_MODEL_PMD 0xD /* Intel Pentium M model D */ +#define CPUID_MODEL_CORE 0xE /* Intel Core Solo & Duo */ +#define CPUID_MODEL_CORE2 0xF /* Intel Core2 Duo */ #define CPUID_MODEL_ATHLON_M1 0x1 /* AMD Athlon Model 1 */ #define CPUID_MODEL_ATHLON_M2 0x2 /* AMD Athlon Model 2 */ @@ -237,6 +271,7 @@ #define CPUID_CACHE_UCACHE_512K 0x43 /* 2nd-level cache, 512K */ #define CPUID_CACHE_UCACHE_1M 0x44 /* 2nd-level cache, 1M */ #define CPUID_CACHE_UCACHE_2M 0x45 /* 2nd-level cache, 2M */ +#define CPUID_CACHE_UCACHE_4M 0x49 /* 2nd-level cache, 4M */ #define CPUID_CACHE_ITLB_64 0x50 /* Instruction TLB, 64 entries */ #define CPUID_CACHE_ITLB_128 0x51 /* Instruction TLB, 128 entries */ #define CPUID_CACHE_ITLB_256 0x52 /* Instruction TLB, 256 entries */ @@ -275,6 +310,20 @@ #include +typedef enum { eax, ebx, ecx, edx } cpuid_register_t; +static inline void +cpuid(uint32_t *data) +{ + asm("cpuid" + : "=a" (data[eax]), + "=b" (data[ebx]), + "=c" (data[ecx]), + "=d" (data[edx]) + : "a" (data[eax]), + "b" (data[ebx]), + "c" (data[ecx]), + "d" (data[edx])); +} static inline void do_cpuid(uint32_t selector, uint32_t *data) { @@ -309,20 +358,20 @@ typedef struct { { value, type, size, linesize } #endif /* KERNEL */ -/* Physical CPU info */ +/* Physical CPU info - this is exported out of the kernel (kexts), so be wary of changes */ typedef struct { char cpuid_vendor[16]; char cpuid_brand_string[48]; const char *cpuid_model_string; - uint32_t cpuid_value; - cpu_type_t cpuid_type; + cpu_type_t cpuid_type; /* this is *not* a cpu_type_t in our */ uint8_t cpuid_family; uint8_t cpuid_model; uint8_t cpuid_extmodel; uint8_t cpuid_extfamily; uint8_t cpuid_stepping; - uint32_t cpuid_features; + uint64_t cpuid_features; + uint64_t cpuid_extfeatures; uint32_t cpuid_signature; uint8_t cpuid_brand; @@ -331,6 +380,12 @@ typedef struct { uint8_t cache_info[64]; /* list of cache descriptors */ + uint32_t cpuid_cores_per_package; + uint32_t cpuid_logical_per_package; + uint32_t cache_sharing[LCACHE_MAX]; + + cpu_type_t cpuid_cpu_type; /* */ + cpu_subtype_t cpuid_cpu_subtype; /* */ } i386_cpu_info_t; #ifdef __cplusplus @@ -340,20 +395,22 @@ extern "C" { /* * External declarations */ -extern cpu_type_t cpuid_cputype(int); -extern void cpuid_cpu_display(const char *, __unused int); -extern void cpuid_feature_display(const char *, __unused int); -extern char * cpuid_get_feature_names(uint32_t, char *, unsigned); - -extern uint32_t cpuid_features(void); +extern cpu_type_t cpuid_cputype(void); +extern cpu_subtype_t cpuid_cpusubtype(void); +extern void cpuid_cpu_display(const char *); +extern void cpuid_feature_display(const char *); +extern void cpuid_extfeature_display(const char *); +extern char * cpuid_get_feature_names(uint64_t, char *, unsigned); +extern char * cpuid_get_extfeature_names(uint64_t, char *, unsigned); + +extern uint64_t cpuid_features(void); +extern uint64_t cpuid_extfeatures(void); extern uint32_t cpuid_family(void); extern void cpuid_get_info(i386_cpu_info_t *info_p); extern i386_cpu_info_t *cpuid_info(void); -/* XXX obsolescent: */ -extern uint32_t cpuid_feature; -extern void set_cpu_model(void); +extern void cpuid_set_info(void); #ifdef __cplusplus } diff --git a/osfmk/i386/cswitch.s b/osfmk/i386/cswitch.s index 107bc26af..31ca053b8 100644 --- a/osfmk/i386/cswitch.s +++ b/osfmk/i386/cswitch.s @@ -66,6 +66,7 @@ #define CX(addr, reg) addr(,reg,4) + .text /* * Context switch routines for i386. */ @@ -86,23 +87,27 @@ Entry(Load_context) call EXT(thread_continue) /* - * This really only has to save registers + * This has to save registers only * when there is no explicit continuation. */ Entry(Switch_context) - movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ + popl %eax /* pop return PC */ + /* Test for a continuation and skip all state saving if so... */ + cmpl $0,4(%esp) + jne 5f + movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) movl %edi,KSS_EDI(%ecx) movl %esi,KSS_ESI(%ecx) - popl KSS_EIP(%ecx) /* save return PC */ + movl %eax,KSS_EIP(%ecx) /* save return PC */ movl %esp,KSS_ESP(%ecx) /* save SP */ - +5: movl 0(%esp),%eax /* return old thread */ movl 8(%esp),%ebx /* get new thread */ - movl %ebx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ + movl %ebx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ movl TH_KERNEL_STACK(%ebx),%ecx /* get its kernel stack */ lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%ebx /* point to stack top */ @@ -110,9 +115,7 @@ Entry(Switch_context) movl %ecx,%gs:CPU_ACTIVE_STACK /* set current stack */ movl %ebx,%gs:CPU_KERNEL_STACK /* set stack top */ - - movl $0,%gs:CPU_ACTIVE_KLOADED - + movl KSS_ESP(%ecx),%esp /* switch stacks */ movl KSS_ESI(%ecx),%esi /* restore registers */ movl KSS_EDI(%ecx),%edi @@ -126,19 +129,17 @@ Entry(Thread_continue) call *%ebx /* call real continuation */ /* - * void machine_processor_shutdown(thread_t thread, - * void (*routine)(processor_t), - * processor_t processor) + * thread_t Shutdown_context(thread_t thread, + * void (*routine)(processor_t), + * processor_t processor) * * saves the kernel context of the thread, * switches to the interrupt stack, * continues the thread (with thread_continue), * then runs routine on the interrupt stack. * - * Assumes that the thread is a kernel thread (thus - * has no FPU state) */ -Entry(machine_processor_shutdown) +Entry(Shutdown_context) movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) @@ -157,10 +158,3 @@ Entry(machine_processor_shutdown) pushl %esi /* push argument */ call *%ebx /* call routine to run */ hlt /* (should never return) */ - - - .text - - .globl EXT(locore_end) -LEXT(locore_end) - diff --git a/osfmk/i386/db_interface.c b/osfmk/i386/db_interface.c index 4bb885b1b..55cf5aa62 100644 --- a/osfmk/i386/db_interface.c +++ b/osfmk/i386/db_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include @@ -86,9 +88,11 @@ #include #include +#include + int db_active = 0; -struct i386_saved_state *i386_last_saved_statep; -struct i386_saved_state i386_nested_saved_state; +x86_saved_state32_t *i386_last_saved_statep; +x86_saved_state32_t i386_nested_saved_state; unsigned i386_last_kdb_sp; extern thread_t db_default_act; @@ -111,7 +115,7 @@ struct int_regs { int esi; int ebp; int ebx; - struct i386_interrupt_state *is; + x86_saved_state32_t *is; }; extern char * trap_type[]; @@ -160,10 +164,29 @@ extern jmp_buf_t *db_recover; * in a ktss, we hard-wire that in, rather than indexing the gdt * with tss_sel to derive a pointer to the desired tss. */ + +/* + * Code used to synchronize kdb among all cpus, one active at a time, switch + * from one to another using cpu #cpu + */ + +decl_simple_lock_data(, kdb_lock) /* kdb lock */ + +#define db_simple_lock_init(l, e) hw_lock_init(&((l)->interlock)) +#define db_simple_lock_try(l) hw_lock_try(&((l)->interlock)) +#define db_simple_unlock(l) hw_lock_unlock(&((l)->interlock)) + +int kdb_cpu = -1; /* current cpu running kdb */ +int kdb_debug = 1; +volatile unsigned int cpus_holding_bkpts; /* counter for number of cpus + * holding breakpoints + */ +extern boolean_t db_breakpoints_inserted; + void db_tss_to_frame( int tss_sel, - struct i386_saved_state *regs) + x86_saved_state32_t *regs) { extern struct i386_tss ktss; int mycpu = cpu_number(); @@ -174,7 +197,7 @@ db_tss_to_frame( /* * ddb will overwrite whatever's in esp, so put esp0 elsewhere, too. */ - regs->esp = tss->esp0; + regs->cr2 = tss->esp0; regs->efl = tss->eflags; regs->eip = tss->eip; regs->trapno = tss->ss0; /* XXX */ @@ -201,7 +224,7 @@ db_tss_to_frame( */ boolean_t db_trap_from_asm( - struct i386_saved_state *regs) + x86_saved_state32_t *regs) { int code; int type; @@ -215,12 +238,18 @@ int kdb_trap( int type, int code, - struct i386_saved_state *regs) + x86_saved_state32_t *regs) { extern char etext; boolean_t trap_from_user; - spl_t s = splhigh(); + spl_t s; + int previous_console_device; + s = splhigh(); + + previous_console_device = switch_to_serial_console(); + + db_printf("kdb_trap(): type %d, code %d, regs->eip 0x%x\n", type, code, regs->eip); switch (type) { case T_DEBUG: /* single_step */ { @@ -274,8 +303,8 @@ kdb_trap( if (!IS_USER_TRAP(regs, &etext)) { bzero((char *)&ddb_regs, sizeof (ddb_regs)); - *(struct i386_saved_state_from_kernel *)&ddb_regs = - *(struct i386_saved_state_from_kernel *)regs; + *(struct x86_saved_state32_from_kernel *)&ddb_regs = + *(struct x86_saved_state32_from_kernel *)regs; trap_from_user = FALSE; } else { @@ -300,6 +329,7 @@ kdb_trap( regs->ecx = ddb_regs.ecx; regs->edx = ddb_regs.edx; regs->ebx = ddb_regs.ebx; + if (trap_from_user) { /* * user mode - saved esp and ss valid @@ -307,6 +337,7 @@ kdb_trap( regs->uesp = ddb_regs.uesp; /* user stack pointer */ regs->ss = ddb_regs.ss & 0xffff; /* user stack segment */ } + regs->ebp = ddb_regs.ebp; regs->esi = ddb_regs.esi; regs->edi = ddb_regs.edi; @@ -324,16 +355,13 @@ kdb_trap( trap_from_user)) == BKPT_INST)) regs->eip += BKPT_SIZE; - + + switch_to_old_console(previous_console_device); kdb_exit: kdb_leave(); current_cpu_datap()->cpu_kdb_saved_state = 0; -#if MACH_MP_DEBUG - current_cpu_datap()->cpu_masked_state_cnt = 0; -#endif /* MACH_MP_DEBUG */ - enable_preemption(); splx(s); @@ -363,8 +391,8 @@ kdb_kentry( { extern char etext; boolean_t trap_from_user; - struct i386_interrupt_state *is = int_regs->is; - struct i386_saved_state regs; + x86_saved_state32_t *is = int_regs->is; + x86_saved_state32_t regs; spl_t s; s = splhigh(); @@ -465,18 +493,28 @@ db_user_to_kernel_address( int flag) { register pt_entry_t *ptp; + vm_offset_t src; + + /* + * must not pre-empted while using the pte pointer passed + * back since it's been mapped through a per-cpu window + */ + mp_disable_preemption(); - ptp = pmap_pte(task->map->pmap, addr); + ptp = pmap_pte(task->map->pmap, (vm_map_offset_t)addr); if (ptp == PT_ENTRY_NULL || (*ptp & INTEL_PTE_VALID) == 0) { if (flag) { db_printf("\nno memory is assigned to address %08x\n", addr); db_error(0); /* NOTREACHED */ } + mp_enable_preemption(); return(-1); } - src = (vm_offset_t)pte_to_pa(*ptp); + + mp_enable_preemption(); + *(int *) DMAP1 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | INTEL_PTE_REF | INTEL_PTE_MOD; #if defined(I386_CPU) @@ -568,7 +606,7 @@ db_write_bytes( if (addr >= VM_MIN_KERNEL_ADDRESS && addr <= (vm_offset_t)&etext) { - ptep0 = pmap_pte(kernel_pmap, addr); + ptep0 = pmap_pte(kernel_pmap, (vm_map_offset_t)addr); oldmap0 = *ptep0; *ptep0 |= INTEL_PTE_WRITE; @@ -576,7 +614,7 @@ db_write_bytes( if (i386_trunc_page(addr) != addr1) { /* data crosses a page boundary */ - ptep1 = pmap_pte(kernel_pmap, addr1); + ptep1 = pmap_pte(kernel_pmap, (vm_map_offset_t)addr1); oldmap1 = *ptep1; *ptep1 |= INTEL_PTE_WRITE; } @@ -754,24 +792,6 @@ db_task_name( db_printf(" "); } -/* - * Code used to synchronize kdb among all cpus, one active at a time, switch - * from on to another using kdb_on! #cpu or cpu #cpu - */ - -decl_simple_lock_data(, kdb_lock) /* kdb lock */ - -#define db_simple_lock_init(l, e) hw_lock_init(&((l)->interlock)) -#define db_simple_lock_try(l) hw_lock_try(&((l)->interlock)) -#define db_simple_unlock(l) hw_lock_unlock(&((l)->interlock)) - -int kdb_cpu = -1; /* current cpu running kdb */ -int kdb_debug = 0; -volatile unsigned int cpus_holding_bkpts; /* counter for number of cpus holding - breakpoints (ie: cpus that did not - insert back breakpoints) */ -extern boolean_t db_breakpoints_inserted; - void db_machdep_init(void) { @@ -779,16 +799,14 @@ db_machdep_init(void) db_simple_lock_init(&kdb_lock, 0); for (c = 0; c < real_ncpus; ++c) { - db_stacks[c] = (vm_offset_t) (db_stack_store + - (INTSTACK_SIZE * (c + 1)) - sizeof (natural_t)); if (c == master_cpu) { - dbtss.esp0 = (int)(db_task_stack_store + + master_dbtss.esp0 = (int)(db_task_stack_store + (INTSTACK_SIZE * (c + 1)) - sizeof (natural_t)); - dbtss.esp = dbtss.esp0; - dbtss.eip = (int)&db_task_start; + master_dbtss.esp = master_dbtss.esp0; + master_dbtss.eip = (int)&db_task_start; /* * The TSS for the debugging task on each slave CPU - * is set up in mp_desc_init(). + * is set up in cpu_desc_init(). */ } } @@ -805,12 +823,12 @@ db_machdep_init(void) int kdb_enter(int pc) { - int mycpu; + int my_cpu; int retval; disable_preemption(); - mycpu = cpu_number(); + my_cpu = cpu_number(); if (current_cpu_datap()->cpu_db_pass_thru) { retval = 0; @@ -818,16 +836,17 @@ kdb_enter(int pc) } current_cpu_datap()->cpu_kdb_active++; + lock_kdb(); - if (kdb_debug) - db_printf("kdb_enter: cpu %d, is_slave %d, kdb_cpu %d, run mode %d pc %x (%x) holds %d\n", - my_cpu, current_cpu_datap()->cpu_kdb_is_slave, kdb_cpu, - db_run_mode, pc, *(int *)pc, cpus_holding_bkpts); + db_printf("kdb_enter(): cpu_number %d, kdb_cpu %d\n", my_cpu, kdb_cpu); + if (db_breakpoints_inserted) cpus_holding_bkpts++; + if (kdb_cpu == -1 && !current_cpu_datap()->cpu_kdb_is_slave) { kdb_cpu = my_cpu; + db_printf("Signaling other processors..\n"); remote_kdb(); /* stop other cpus */ retval = 1; } else if (kdb_cpu == my_cpu) @@ -868,6 +887,8 @@ kdb_leave(void) unlock_kdb(); current_cpu_datap()->cpu_kdb_active--; + mp_kdb_exit(); + enable_preemption(); if (wait) { @@ -880,14 +901,12 @@ lock_kdb(void) { int my_cpu; register i; - extern void kdb_console(void); disable_preemption(); my_cpu = cpu_number(); for(;;) { - kdb_console(); if (kdb_cpu != -1 && kdb_cpu != my_cpu) { continue; } @@ -982,21 +1001,17 @@ kdb_on( } } +/* + * system reboot + */ + +extern void kdp_reboot(void); + void db_reboot( db_expr_t addr, boolean_t have_addr, db_expr_t count, char *modif) { - boolean_t reboot = TRUE; - char *cp, c; - - cp = modif; - while ((c = *cp++) != 0) { - if (c == 'r') /* reboot */ - reboot = TRUE; - if (c == 'h') /* halt */ - reboot = FALSE; - } - halt_all_cpus(reboot); + kdp_reboot(); } diff --git a/osfmk/i386/db_machdep.h b/osfmk/i386/db_machdep.h index 147140acd..dbcf9eb43 100644 --- a/osfmk/i386/db_machdep.h +++ b/osfmk/i386/db_machdep.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,11 +63,13 @@ #include /* for thread_status */ #include #include +#include +#include -typedef vm_offset_t db_addr_t; /* address - unsigned */ -typedef int db_expr_t; /* expression - signed */ +typedef addr64_t db_addr_t; /* address - unsigned */ +typedef uint64_t db_expr_t; /* expression */ -typedef struct i386_saved_state db_regs_t; +typedef struct x86_saved_state32 db_regs_t; db_regs_t ddb_regs; /* register state */ #define DDB_REGS (&ddb_regs) extern int db_active; /* ddb is active */ @@ -107,9 +109,7 @@ int db_inst_store(unsigned long); db_check_access(addr,size,task) #define DB_PHYS_EQ(task1,addr1,task2,addr2) \ db_phys_eq(task1,addr1,task2,addr2) -#define DB_VALID_KERN_ADDR(addr) \ - ((addr) >= VM_MIN_KERNEL_ADDRESS && \ - (addr) < VM_MAX_KERNEL_ADDRESS) +#define DB_VALID_KERN_ADDR(addr) (1) #define DB_VALID_ADDRESS(addr,user) \ ((!(user) && DB_VALID_KERN_ADDR(addr)) || \ ((user) && (addr) < VM_MAX_ADDRESS)) @@ -154,6 +154,19 @@ extern void db_reboot( db_expr_t count, char *modif); +extern void db_display_kmod(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_display_real(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_display_iokit(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_cpuid(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_msr(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_apic(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_test(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_intcnt(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_display_hpet(hpetReg_t *hpt); +extern void db_hpet(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_cfg(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_dtimers(db_expr_t addr, int have_addr, db_expr_t count, char * modif); + /* macros for printing OS server dependent task name */ #define DB_TASK_NAME(task) db_task_name(task) @@ -166,21 +179,26 @@ extern void db_task_name( /* macro for checking if a thread has used floating-point */ -#define db_act_fp_used(act) (act && act->machine.pcb->ims.ifps) +#define db_act_fp_used(act) (act && act->machine.pcb->ifps) extern void db_tss_to_frame( int tss_sel, - struct i386_saved_state *regs); + x86_saved_state32_t *regs); extern int kdb_trap( int type, int code, - struct i386_saved_state *regs); + x86_saved_state32_t *regs); extern boolean_t db_trap_from_asm( - struct i386_saved_state *regs); + x86_saved_state32_t *regs); extern int dr6(void); extern void kdb_on( int cpu); -extern void cnpollc( - boolean_t on); + +#if MACH_KDB +extern void db_getpmgr(pmData_t *pmj); +extern void db_chkpmgr(void); +#endif /* MACH_KDB */ +extern void db_pmgr(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +extern void db_nap(db_expr_t addr, int have_addr, db_expr_t count, char * modif); #endif /* _I386_DB_MACHDEP_H_ */ diff --git a/osfmk/i386/db_trace.c b/osfmk/i386/db_trace.c index b5ef0bc7d..49d6a465e 100644 --- a/osfmk/i386/db_trace.c +++ b/osfmk/i386/db_trace.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,6 +61,13 @@ #include #include #include +#include + +#include +#include +#include +#include +#include #include #include @@ -70,29 +77,17 @@ #include extern jmp_buf_t *db_recover; -extern struct i386_saved_state *saved_state[]; +struct x86_kernel_state32 ddb_null_kregs; +extern kmod_info_t *kmod; -struct i386_kernel_state ddb_null_kregs; /* * Stack trace. */ -extern vm_offset_t vm_min_inks_addr; /* set by db_clone_symtabXXX */ -#define INKSERVER(va) (((vm_offset_t)(va)) >= vm_min_inks_addr) - -extern vm_offset_t interrupt_stack[]; -#define ININTSTACK(va) \ - (((vm_offset_t)(va)) >= interrupt_stack[cpu_number()] &&\ - (((vm_offset_t)(va)) < interrupt_stack[cpu_number()] + \ - INTSTACK_SIZE)) +#define INKERNELSTACK(va, th) 1 -#define INKERNELSTACK(va, th) \ - (th == THREAD_NULL || \ - (((vm_offset_t)(va)) >= th->thread->kernel_stack && \ - (((vm_offset_t)(va)) < th->thread->kernel_stack + \ - KERNEL_STACK_SIZE)) || \ - ININTSTACK(va)) +#define DB_NUMARGS_MAX 5 struct i386_frame { struct i386_frame *f_frame; @@ -113,20 +108,20 @@ boolean_t db_trace_symbols_found = FALSE; struct i386_kregs { char *name; - int offset; + unsigned int offset; } i386_kregs[] = { - { "ebx", (int)(&((struct i386_kernel_state *)0)->k_ebx) }, - { "esp", (int)(&((struct i386_kernel_state *)0)->k_esp) }, - { "ebp", (int)(&((struct i386_kernel_state *)0)->k_ebp) }, - { "edi", (int)(&((struct i386_kernel_state *)0)->k_edi) }, - { "esi", (int)(&((struct i386_kernel_state *)0)->k_esi) }, - { "eip", (int)(&((struct i386_kernel_state *)0)->k_eip) }, - { 0 }, + { "ebx", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_ebx) }, + { "esp", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_esp) }, + { "ebp", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_ebp) }, + { "edi", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_edi) }, + { "esi", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_esi) }, + { "eip", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_eip) }, + { 0 } }; /* Forward */ -extern int * db_lookup_i386_kreg( +extern unsigned int * db_lookup_i386_kreg( char *name, int *kregp); extern int db_i386_reg_value( @@ -151,26 +146,26 @@ extern int _setjmp( * Machine register set. */ struct db_variable db_regs[] = { - { "cs", (int *)&ddb_regs.cs, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "ds", (int *)&ddb_regs.ds, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "es", (int *)&ddb_regs.es, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "fs", (int *)&ddb_regs.fs, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "gs", (int *)&ddb_regs.gs, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "ss", (int *)&ddb_regs.ss, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "eax",(int *)&ddb_regs.eax, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "ecx",(int *)&ddb_regs.ecx, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "edx",(int *)&ddb_regs.edx, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "ebx",(int *)&ddb_regs.ebx, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "esp",(int *)&ddb_regs.uesp,db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "ebp",(int *)&ddb_regs.ebp, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "esi",(int *)&ddb_regs.esi, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "edi",(int *)&ddb_regs.edi, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "eip",(int *)&ddb_regs.eip, db_i386_reg_value, 0, 0, 0, 0, TRUE }, - { "efl",(int *)&ddb_regs.efl, db_i386_reg_value, 0, 0, 0, 0, TRUE }, + { "cs", (unsigned int *)&ddb_regs.cs, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "ds", (unsigned int *)&ddb_regs.ds, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "es", (unsigned int *)&ddb_regs.es, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "fs", (unsigned int *)&ddb_regs.fs, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "gs", (unsigned int *)&ddb_regs.gs, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "ss", (unsigned int *)&ddb_regs.ss, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "eax",(unsigned int *)&ddb_regs.eax, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "ecx",(unsigned int *)&ddb_regs.ecx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "edx",(unsigned int *)&ddb_regs.edx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "ebx",(unsigned int *)&ddb_regs.ebx, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "esp",(unsigned int *)&ddb_regs.uesp,db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "ebp",(unsigned int *)&ddb_regs.ebp, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "esi",(unsigned int *)&ddb_regs.esi, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "edi",(unsigned int *)&ddb_regs.edi, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "eip",(unsigned int *)&ddb_regs.eip, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 }, + { "efl",(unsigned int *)&ddb_regs.efl, db_i386_reg_value, 0, 0, 0, 0, TRUE, 0, 0, (int *)0, 0 } }; struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]); -int * +unsigned int * db_lookup_i386_kreg( char *name, int *kregp) @@ -179,7 +174,7 @@ db_lookup_i386_kreg( for (kp = i386_kregs; kp->name; kp++) { if (strcmp(name, kp->name) == 0) - return((int *)((int)kregp + kp->offset)); + return((unsigned int *)((int)kregp + kp->offset)); } return(0); } @@ -192,11 +187,9 @@ db_i386_reg_value( db_var_aux_param_t ap) { extern char etext; - int *dp = 0; + unsigned int *dp = 0; db_expr_t null_reg = 0; register thread_t thr_act = ap->thr_act; - extern unsigned int_stack_high; - int cpu; if (db_option(ap->modif, 'u')) { if (thr_act == THREAD_NULL) { @@ -206,38 +199,36 @@ db_i386_reg_value( if (thr_act == current_thread()) { if (IS_USER_TRAP(&ddb_regs, &etext)) dp = vp->valuep; - else if (ddb_regs.ebp < int_stack_high) - db_error("cannot get/set user registers in nested interrupt\n"); } } else { if (thr_act == THREAD_NULL || thr_act == current_thread()) { dp = vp->valuep; } else { - if (thr_act->thread && - !(thr_act->thread->state & TH_STACK_HANDOFF) && - thr_act->thread->kernel_stack) { + if (thr_act && + (thr_act->continuation != THREAD_CONTINUE_NULL) && + thr_act->kernel_stack) { int cpu; for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running == TRUE && - cpu_datap(cpu)->cpu_active_thread == thr_act->thread && saved_state[cpu]) { - dp = (int *) (((int)saved_state[cpu]) + - (((int) vp->valuep) - - (int) &ddb_regs)); + cpu_datap(cpu)->cpu_active_thread == thr_act && cpu_datap(cpu)->cpu_kdb_saved_state) { + dp = (unsigned int *) (((unsigned int)cpu_datap(cpu)->cpu_kdb_saved_state) + + (((unsigned int) vp->valuep) - + (unsigned int) &ddb_regs)); break; } } - if (dp == 0 && thr_act && thr_act->thread) + if (dp == 0 && thr_act) dp = db_lookup_i386_kreg(vp->name, - (int *)(STACK_IKS(thr_act->thread->kernel_stack))); + (unsigned int *)(STACK_IKS(thr_act->kernel_stack))); if (dp == 0) dp = &null_reg; - } else if (thr_act->thread && - (thr_act->thread->state&TH_STACK_HANDOFF)){ - /* only EIP is valid */ - if (vp->valuep == (int *) &ddb_regs.eip) { - dp = (int *)(&thr_act->thread->continuation); - } else { + } else if (thr_act && + (thr_act->continuation != THREAD_CONTINUE_NULL)) { + /* only EIP is valid */ + if (vp->valuep == (unsigned int *) &ddb_regs.eip) { + dp = (unsigned int *)(&thr_act->continuation); + } else { dp = &null_reg; } } @@ -249,10 +240,10 @@ db_i386_reg_value( if (!db_option(ap->modif, 'u')) { for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running == TRUE && - cpu_datap(cpu)->cpu_active_thread == thr_act->thread && saved_state[cpu]) { - dp = (int *) (((int)saved_state[cpu]) + - (((int) vp->valuep) - - (int) &ddb_regs)); + cpu_datap(cpu)->cpu_active_thread == thr_act && cpu_datap(cpu)->cpu_kdb_saved_state) { + dp = (unsigned int *) (((unsigned int)cpu_datap(cpu)->cpu_kdb_saved_state) + + (((unsigned int) vp->valuep) - + (unsigned int) &ddb_regs)); break; } } @@ -260,8 +251,8 @@ db_i386_reg_value( if (dp == 0) { if (!thr_act || thr_act->machine.pcb == 0) db_error("no pcb\n"); - dp = (int *)((int)(&thr_act->machine.pcb->iss) + - ((int)vp->valuep - (int)&ddb_regs)); + dp = (unsigned int *)((unsigned int)(thr_act->machine.pcb->iss) + + ((unsigned int)vp->valuep - (unsigned int)&ddb_regs)); } } if (flag == DB_VAR_SET) @@ -366,33 +357,33 @@ db_nextframe( int frame_type, /* in */ thread_t thr_act) /* in */ { + x86_saved_state32_t *iss32; extern char * trap_type[]; extern int TRAP_TYPES; - struct i386_saved_state *saved_regs; struct interrupt_frame *ifp; - struct i386_interrupt_state *isp; task_t task = (thr_act != THREAD_NULL)? thr_act->task: TASK_NULL; switch(frame_type) { case TRAP: /* * We know that trap() has 1 argument and we know that - * it is an (strcut i386_saved_state *). + * it is an (x86_saved_state32_t *). */ - saved_regs = (struct i386_saved_state *) - db_get_task_value((int)&((*fp)->f_arg0),4,FALSE,task); - if (saved_regs->trapno >= 0 && saved_regs->trapno < TRAP_TYPES) { - db_printf(">>>>> %s trap at ", - trap_type[saved_regs->trapno]); + iss32 = (x86_saved_state32_t *) + db_get_task_value((int)&((*fp)->f_arg0),4,FALSE,task); + + if (iss32->trapno >= 0 && iss32->trapno < TRAP_TYPES) { + db_printf(">>>>> %s trap at ", + trap_type[iss32->trapno]); } else { - db_printf(">>>>> trap (number %d) at ", - saved_regs->trapno & 0xffff); + db_printf(">>>>> trap (number %d) at ", + iss32->trapno & 0xffff); } - db_task_printsym(saved_regs->eip, DB_STGY_PROC, task); + db_task_printsym(iss32->eip, DB_STGY_PROC, task); db_printf(" <<<<<\n"); - *fp = (struct i386_frame *)saved_regs->ebp; - *ip = (db_addr_t)saved_regs->eip; + *fp = (struct i386_frame *)iss32->ebp; + *ip = (db_addr_t)iss32->eip; break; case INTERRUPT: if (*lfp == 0) { @@ -402,19 +393,21 @@ db_nextframe( db_printf(">>>>> interrupt at "); ifp = (struct interrupt_frame *)(*lfp); *fp = ifp->if_frame; - if (ifp->if_iretaddr == db_return_to_iret_symbol_value) - *ip = ((struct i386_interrupt_state *) ifp->if_edx)->eip; - else - *ip = (db_addr_t) ifp->if_eip; + if (ifp->if_iretaddr == db_return_to_iret_symbol_value) { + *ip = ((x86_saved_state32_t *) ifp->if_edx)->eip; + } else + *ip = (db_addr_t) ifp->if_eip; db_task_printsym(*ip, DB_STGY_PROC, task); db_printf(" <<<<<\n"); break; case SYSCALL: if (thr_act != THREAD_NULL && thr_act->machine.pcb) { - *ip = (db_addr_t) thr_act->machine.pcb->iss.eip; - *fp = (struct i386_frame *) thr_act->machine.pcb->iss.ebp; - break; + iss32 = (x86_saved_state32_t *)thr_act->machine.pcb->iss; + + *ip = (db_addr_t)(iss32->eip); + *fp = (struct i386_frame *)(iss32->ebp); } + break; /* falling down for unknown case */ default: miss_frame: @@ -435,6 +428,7 @@ db_stack_trace_cmd( char *modif) { struct i386_frame *frame, *lastframe; + x86_saved_state32_t *iss32; int *argp; db_addr_t callpc, lastcallpc; int frame_type; @@ -475,17 +469,17 @@ db_stack_trace_cmd( if (!have_addr && !trace_thread) { have_addr = TRUE; trace_thread = TRUE; - act_list = &(current_task()->thr_acts); + act_list = &(current_task()->threads); addr = (db_expr_t) queue_first(act_list); } else if (trace_thread) { if (have_addr) { if (!db_check_act_address_valid((thread_t)addr)) { if (db_lookup_task((task_t)addr) == -1) return; - act_list = &(((task_t)addr)->thr_acts); + act_list = &(((task_t)addr)->threads); addr = (db_expr_t) queue_first(act_list); } else { - act_list = &(((thread_t)addr)->task->thr_acts); + act_list = &(((thread_t)addr)->task->threads); thcount = db_lookup_task_act(((thread_t)addr)->task, (thread_t)addr); } @@ -498,7 +492,7 @@ db_stack_trace_cmd( return; } have_addr = TRUE; - act_list = &th->task->thr_acts; + act_list = &th->task->threads; addr = (db_expr_t) queue_first(act_list); } } @@ -518,11 +512,13 @@ db_stack_trace_cmd( callpc = (db_addr_t)ddb_regs.eip; th = current_thread(); task = (th != THREAD_NULL)? th->task: TASK_NULL; + db_printf("thread 0x%x, current_thread() is 0x%x, ebp is 0x%x, eip is 0x%x\n", th, current_thread(), ddb_regs.ebp, ddb_regs.eip); } else if (trace_thread) { if (have_addr) { th = (thread_t) addr; - if (!db_check_act_address_valid(th)) - return; + if (!db_check_act_address_valid(th)) { + return; + } } else { th = db_default_act; if (th == THREAD_NULL) @@ -534,11 +530,11 @@ db_stack_trace_cmd( } if (trace_all_threads) db_printf("---------- Thread 0x%x (#%d of %d) ----------\n", - addr, thcount, th->task->thr_act_count); + addr, thcount, th->task->thread_count); next_activation: user_frame = 0; - +// kprintf("th is %x, current_thread() is %x, ddb_regs.ebp is %x ddb_regs.eip is %x\n", th, current_thread(), ddb_regs.ebp, ddb_regs.eip); task = th->task; if (th == current_thread()) { frame = (struct i386_frame *)ddb_regs.ebp; @@ -548,36 +544,31 @@ db_stack_trace_cmd( db_printf("thread has no pcb\n"); return; } - if (!th->thread) { - register struct i386_saved_state *iss = - &th->machine.pcb->iss; - + if (!th) { db_printf("thread has no shuttle\n"); -#if 0 - frame = (struct i386_frame *) (iss->ebp); - callpc = (db_addr_t) (iss->eip); -#else + goto thread_done; -#endif } - else if ((th->thread->state & TH_STACK_HANDOFF) || - th->thread->kernel_stack == 0) { - register struct i386_saved_state *iss = - &th->machine.pcb->iss; + else if ( (th->continuation != THREAD_CONTINUE_NULL) || + th->kernel_stack == 0) { db_printf("Continuation "); - db_task_printsym((db_expr_t)th->thread->continuation, + db_task_printsym((db_expr_t)th->continuation, DB_STGY_PROC, task); db_printf("\n"); - frame = (struct i386_frame *) (iss->ebp); - callpc = (db_addr_t) (iss->eip); + + iss32 = (x86_saved_state32_t *)th->machine.pcb->iss; + + frame = (struct i386_frame *) (iss32->ebp); + callpc = (db_addr_t) (iss32->eip); + } else { int cpu; for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running == TRUE && - cpu_datap(cpu)->cpu_active_thread == th->thread && - saved_state[cpu]) { + cpu_datap(cpu)->cpu_active_thread == th && + cpu_datap(cpu)->cpu_kdb_saved_state) { break; } } @@ -587,16 +578,16 @@ db_stack_trace_cmd( * which is not the top_most one in the RPC chain: * use the activation's pcb. */ - register struct i386_saved_state *iss = - &th->machine.pcb->iss; - frame = (struct i386_frame *) (iss->ebp); - callpc = (db_addr_t) (iss->eip); + iss32 = (x86_saved_state32_t *)th->machine.pcb->iss; + + frame = (struct i386_frame *) (iss32->ebp); + callpc = (db_addr_t) (iss32->eip); } else { - if (cpu == NCPUS) { - register struct i386_kernel_state *iks; + if (cpu == real_ncpus) { + register struct x86_kernel_state32 *iks; int r; - iks = STACK_IKS(th->thread->kernel_stack); + iks = STACK_IKS(th->kernel_stack); prev = db_recover; if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) { frame = (struct i386_frame *) (iks->k_ebp); @@ -618,9 +609,11 @@ db_stack_trace_cmd( } else { db_printf(">>>>> active on cpu %d <<<<<\n", cpu); - frame = (struct i386_frame *) - saved_state[cpu]->ebp; - callpc = (db_addr_t) saved_state[cpu]->eip; + + iss32 = (x86_saved_state32_t *)cpu_datap(cpu)->cpu_kdb_saved_state; + + frame = (struct i386_frame *) (iss32->ebp); + callpc = (db_addr_t) (iss32->eip); } } } @@ -640,19 +633,18 @@ db_stack_trace_cmd( if (kernel_only) goto thread_done; user_frame++; - } else if (INKSERVER(callpc) && INKSERVER(frame)) { - db_printf(">>>>> INKserver space <<<<<\n"); } lastframe = 0; lastcallpc = (db_addr_t) 0; while (frame_count-- && frame != 0) { - int narg; + int narg = DB_NUMARGS_MAX; char * name; db_expr_t offset; db_addr_t call_func = 0; int r; - + db_addr_t off; + db_symbol_values(NULL, db_search_task_symbol_and_line( callpc, @@ -663,15 +655,22 @@ db_stack_trace_cmd( (user_frame) ? task : 0, &narg), &name, (db_expr_t *)&call_func); + if ( name == NULL) { + db_find_task_sym_and_offset(callpc, + &name, &off, (user_frame) ? task : 0); + offset = (db_expr_t) off; + } + if (user_frame == 0) { - if (call_func == db_user_trap_symbol_value || + if (call_func && call_func == db_user_trap_symbol_value || call_func == db_kernel_trap_symbol_value) { frame_type = TRAP; narg = 1; - } else if (call_func == db_interrupt_symbol_value) { + } else if (call_func && + call_func == db_interrupt_symbol_value) { frame_type = INTERRUPT; goto next_frame; - } else if (call_func == db_syscall_symbol_value) { + } else if (call_func && call_func == db_syscall_symbol_value) { frame_type = SYSCALL; goto next_frame; } else { @@ -759,10 +758,10 @@ db_stack_trace_cmd( (user_frame) ? th : THREAD_NULL); if (frame == 0) { - if (th->lower != THREAD_NULL) { + if (th->task_threads.prev != THREAD_NULL) { if (top_act == THREAD_NULL) top_act = th; - th = th->lower; + th = th->task_threads.prev; db_printf(">>>>> next activation 0x%x ($task%d.%d) <<<<<\n", th, db_lookup_task(th->task), @@ -779,14 +778,10 @@ db_stack_trace_cmd( db_printf(">>>>> user space <<<<<\n"); if (kernel_only) break; - } else if ((!INKSERVER(lastframe) || !INKSERVER(lastcallpc)) && - (INKSERVER(callpc) && INKSERVER(frame))) { - db_printf(">>>>> inkserver space <<<<<\n"); } if (frame <= lastframe) { if ((INKERNELSTACK(lastframe, th) && - !INKERNELSTACK(frame, th)) || - (INKSERVER(lastframe) ^ INKSERVER(frame))) + !INKERNELSTACK(frame, th))) continue; db_printf("Bad frame pointer: 0x%x\n", frame); break; @@ -797,7 +792,7 @@ db_stack_trace_cmd( if (trace_all_threads) { if (top_act != THREAD_NULL) th = top_act; - th = (thread_t) queue_next(&th->thr_acts); + th = (thread_t) queue_next(&th->task_threads); if (! queue_end(act_list, (queue_entry_t) th)) { db_printf("\n"); addr = (db_expr_t) th; @@ -807,3 +802,63 @@ db_stack_trace_cmd( } } } + +extern int kdp_vm_read(caddr_t, caddr_t, unsigned int ); +extern boolean_t kdp_trans_off; +/* + * Print out 256 bytes of real storage + * + * dr [entaddr] + */ +void db_display_real(db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char * modif) { + + int i; + unsigned int xbuf[8]; + unsigned read_result = 0; +/* Print 256 bytes */ + for(i=0; i<8; i++) { + +/* Do a physical read using kdp_vm_read(), rather than replicating the same + * facility + */ + kdp_trans_off = 1; + read_result = kdp_vm_read(addr, &xbuf[0], 32); + kdp_trans_off = 0; + + if (read_result != 32) + db_printf("Unable to read address\n"); + else + db_printf("%016llX %08X %08X %08X %08X %08X %08X %08X %08X\n", addr, /* Print a line */ + xbuf[0], xbuf[1], xbuf[2], xbuf[3], + xbuf[4], xbuf[5], xbuf[6], xbuf[7]); + addr = addr + 0x00000020; /* Point to next address */ + } + db_next = addr; +} + +/* + * Displays all of the kmods in the system. + * + * dk + */ +void +db_display_kmod(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif) +{ + + kmod_info_t *kmd; + unsigned int strt, end; + + kmd = kmod; /* Start at the start */ + + db_printf("info addr start - end name ver\n"); + + while (kmd) { /* Dump 'em all */ + strt = (unsigned int) kmd->address + kmd->hdr_size; + end = (unsigned int) kmd->address + kmd->size; + db_printf("%08X %08X %08X - %08X: %s, %s\n", + kmd, kmd->address, strt, end, kmd->name, kmd->version); + kmd = kmd->next; + } + + return; +} diff --git a/osfmk/i386/eflags.h b/osfmk/i386/eflags.h index bb5d56d4a..64e200fa7 100644 --- a/osfmk/i386/eflags.h +++ b/osfmk/i386/eflags.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,7 +57,7 @@ * i386 flags register */ -#ifndef EFL_CF /* FIXME - this is pulled from mach/i386/eflags.h */ +#ifndef EFL_CF #define EFL_CF 0x00000001 /* carry */ #define EFL_PF 0x00000004 /* parity of low 8 bits */ #define EFL_AF 0x00000010 /* carry out of bit 3 */ @@ -79,6 +79,9 @@ #define EFL_ID 0x00200000 /* cpuID instruction */ #endif +#define EFL_CLR 0xfff88028 +#define EFL_SET 0x00000002 + #define EFL_USER_SET (EFL_IF) #define EFL_USER_CLEAR (EFL_IOPL|EFL_NT|EFL_RF) diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 30de466be..df88486cb 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -67,32 +67,14 @@ #include #include #include -#include +#include #include #include +#include -#if 0 -#include -extern int curr_ipl; -#define ASSERT_IPL(L) \ -{ \ - if (curr_ipl != L) { \ - printf("IPL is %d, expected %d\n", curr_ipl, L); \ - panic("fpu: wrong ipl"); \ - } \ -} -#else -#define ASSERT_IPL(L) -#endif - -int fp_kind = FP_387; /* 80387 present */ +int fp_kind = FP_NO; /* not inited */ zone_t ifps_zone; /* zone for FPU save area */ -#define clear_fpu() \ - { \ - set_ts(); \ - } - #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) /* Forward */ @@ -103,6 +85,63 @@ extern void fp_save( extern void fp_load( thread_t thr_act); +static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps); + +struct x86_fpsave_state starting_fp_state; + + +/* Global MXCSR capability bitmask */ +static unsigned int mxcsr_capability_mask; + +/* + * Determine the MXCSR capability mask, which allows us to mask off any + * potentially unsafe "reserved" bits before restoring the FPU context. + * *Not* per-cpu, assumes symmetry. + */ +static void +configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps) +{ + /* FXSAVE requires a 16 byte aligned store */ + assert(ALIGNED(ifps,16)); + /* Clear, to prepare for the diagnostic FXSAVE */ + bzero(ifps, sizeof(*ifps)); + /* Disable FPU/SSE Device Not Available exceptions */ + clear_ts(); + + __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state)); + mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK; + + /* Set default mask value if necessary */ + if (mxcsr_capability_mask == 0) + mxcsr_capability_mask = 0xffbf; + + /* Re-enable FPU/SSE DNA exceptions */ + set_ts(); +} + +/* + * Allocate and initialize FP state for current thread. + * Don't load state. + */ +static struct x86_fpsave_state * +fp_state_alloc(void) +{ + struct x86_fpsave_state *ifps; + + ifps = (struct x86_fpsave_state *)zalloc(ifps_zone); + assert(ALIGNED(ifps,16)); + bzero((char *)ifps, sizeof *ifps); + + return ifps; +} + +static inline void +fp_state_free(struct x86_fpsave_state *ifps) +{ + zfree(ifps_zone, ifps); +} + + /* * Look for FPU and initialize it. * Called on each CPU. @@ -126,32 +165,41 @@ init_fpu(void) if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { - fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ - if (cpuid_features() & CPUID_FEATURE_FXSR) { - fp_kind = FP_FXSR; - set_cr4(get_cr4() | CR4_FXS); - printf("Enabling XMM register save/restore"); - /* And allow SIMD instructions if present */ - if (cpuid_features() & CPUID_FEATURE_SSE) { - printf(" and SSE/SSE2"); - set_cr4(get_cr4() | CR4_XMM); - } - printf(" opcodes\n"); - } + if (cpuid_features() & CPUID_FEATURE_FXSR) { + fp_kind = FP_FXSR; + set_cr4(get_cr4() | CR4_FXS); + printf("Enabling XMM register save/restore"); + /* And allow SIMD instructions if present */ + if (cpuid_features() & CPUID_FEATURE_SSE) { + printf(" and SSE/SSE2"); + set_cr4(get_cr4() | CR4_XMM); + } + printf(" opcodes\n"); + } else + panic("fpu is not FP_FXSR"); /* - * Trap wait instructions. Turn off FPU for now. + * initialze FPU to normal starting + * position so that we can take a snapshot + * of that state and store it for future use + * when we're asked for the FPU state of a + * thread, and it hasn't initiated any yet */ - set_cr0(get_cr0() | CR0_TS | CR0_MP); + fpinit(); + fxsave(&starting_fp_state.fx_save_state); + + /* + * Trap wait instructions. Turn off FPU for now. + */ + set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ - fp_kind = FP_NO; - set_cr0(get_cr0() | CR0_EM); + panic("fpu is not FP_FXSR"); } } @@ -161,10 +209,16 @@ init_fpu(void) void fpu_module_init(void) { - ifps_zone = zinit(sizeof(struct i386_fpsave_state), - THREAD_MAX * sizeof(struct i386_fpsave_state), - THREAD_CHUNK * sizeof(struct i386_fpsave_state), - "i386 fpsave state"); + struct x86_fpsave_state *new_ifps; + + ifps_zone = zinit(sizeof(struct x86_fpsave_state), + THREAD_MAX * sizeof(struct x86_fpsave_state), + THREAD_CHUNK * sizeof(struct x86_fpsave_state), + "x86 fpsave state"); + new_ifps = fp_state_alloc(); + /* Determine MXCSR reserved bits */ + configure_mxcsr_capability_mask(new_ifps); + fp_state_free(new_ifps); } /* @@ -173,10 +227,9 @@ fpu_module_init(void) */ void fpu_free(fps) - struct i386_fpsave_state *fps; + struct x86_fpsave_state *fps; { -ASSERT_IPL(SPL0); - zfree(ifps_zone, fps); + fp_state_free(fps); } /* @@ -190,71 +243,78 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_set_fxstate( - thread_t thr_act, - struct i386_float_state *state) + thread_t thr_act, + thread_state_t tstate) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; - register struct i386_fpsave_state *new_ifps; + struct x86_fpsave_state *ifps; + struct x86_fpsave_state *new_ifps; + x86_float_state64_t *state; + pcb_t pcb; -ASSERT_IPL(SPL0); if (fp_kind == FP_NO) - return KERN_FAILURE; + return KERN_FAILURE; + + state = (x86_float_state64_t *)tstate; - if (state->fpkind != FP_FXSR) { - /* strange if this happens, but in case someone builds one of these manually... */ - return fpu_set_state(thr_act, state); - } - assert(thr_act != THREAD_NULL); pcb = thr_act->machine.pcb; - if (state->initialized == 0) { - /* - * new FPU state is 'invalid'. - * Deallocate the fp state if it exists. - */ - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; - simple_unlock(&pcb->lock); - - if (ifps != 0) { - zfree(ifps_zone, ifps); - } - } - else { - /* - * Valid state. Allocate the fp state if there is none. - */ + if (state == NULL) { + /* + * new FPU state is 'invalid'. + * Deallocate the fp state if it exists. + */ + simple_lock(&pcb->lock); + + ifps = pcb->ifps; + pcb->ifps = 0; - new_ifps = 0; + simple_unlock(&pcb->lock); + + if (ifps != 0) + fp_state_free(ifps); + } else { + /* + * Valid state. Allocate the fp state if there is none. + */ + new_ifps = 0; Retry: - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - if (new_ifps == 0) { - simple_unlock(&pcb->lock); - new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(new_ifps,16)); - goto Retry; + simple_lock(&pcb->lock); + + ifps = pcb->ifps; + if (ifps == 0) { + if (new_ifps == 0) { + simple_unlock(&pcb->lock); + new_ifps = fp_state_alloc(); + goto Retry; + } + ifps = new_ifps; + new_ifps = 0; + pcb->ifps = ifps; } - ifps = new_ifps; - new_ifps = 0; - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - } + /* + * now copy over the new data. + */ + bcopy((char *)&state->fpu_fcw, + (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save)); - /* - * now copy over the new data. - */ - bcopy((char *)&state->hw_state[0], (char *)&ifps->fx_save_state, sizeof(struct i386_fx_save)); - ifps->fp_save_flavor = FP_FXSR; - simple_unlock(&pcb->lock); - if (new_ifps != 0) - zfree(ifps_zone, ifps); - } + /* XXX The layout of the state set from user-space may need to be + * validated for consistency. + */ + ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; + /* Mark the thread's floating point status as non-live. */ + ifps->fp_valid = TRUE; + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ + ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + + simple_unlock(&pcb->lock); + if (new_ifps != 0) + fp_state_free(new_ifps); + } return KERN_SUCCESS; } @@ -266,234 +326,141 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_get_fxstate( - thread_t thr_act, - register struct i386_float_state *state) + thread_t thr_act, + thread_state_t tstate) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + struct x86_fpsave_state *ifps; + x86_float_state64_t *state; + kern_return_t ret = KERN_FAILURE; + pcb_t pcb; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) { + if (fp_kind == FP_NO) return KERN_FAILURE; - } else if (fp_kind == FP_387) { - return fpu_get_state(thr_act, state); - } + + state = (x86_float_state64_t *)tstate; assert(thr_act != THREAD_NULL); pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; + + ifps = pcb->ifps; if (ifps == 0) { - /* - * No valid floating-point state. - */ - simple_unlock(&pcb->lock); - bzero((char *)state, sizeof(struct i386_float_state)); - return KERN_SUCCESS; - } + /* + * No valid floating-point state. + */ + bcopy((char *)&starting_fp_state.fx_save_state, + (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); - /* Make sure we`ve got the latest fp state info */ - /* If the live fpu state belongs to our target */ + simple_unlock(&pcb->lock); + + return KERN_SUCCESS; + } + /* + * Make sure we`ve got the latest fp state info + * If the live fpu state belongs to our target + */ if (thr_act == current_thread()) { - clear_ts(); - fp_save(thr_act); - clear_fpu(); - } - - state->fpkind = fp_kind; - state->exc_status = 0; - state->initialized = ifps->fp_valid; - bcopy( (char *)&ifps->fx_save_state, (char *)&state->hw_state[0], sizeof(struct i386_fx_save)); - - simple_unlock(&pcb->lock); - - return KERN_SUCCESS; -} - -/* - * Set the floating-point state for a thread. - * If the thread is not the current thread, it is - * not running (held). Locking needed against - * concurrent fpu_set_state or fpu_get_state. - */ -kern_return_t -fpu_set_state( - thread_t thr_act, - struct i386_float_state *state) -{ - register pcb_t pcb; - register struct i386_fpsave_state *ifps; - register struct i386_fpsave_state *new_ifps; + boolean_t intr; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) - return KERN_FAILURE; + intr = ml_set_interrupts_enabled(FALSE); - assert(thr_act != THREAD_NULL); - pcb = thr_act->machine.pcb; + clear_ts(); + fp_save(thr_act); + clear_fpu(); - if (state->initialized == 0) { - /* - * new FPU state is 'invalid'. - * Deallocate the fp state if it exists. - */ - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; - simple_unlock(&pcb->lock); - - if (ifps != 0) { - zfree(ifps_zone, ifps); - } + (void)ml_set_interrupts_enabled(intr); } - else { - /* - * Valid state. Allocate the fp state if there is none. - */ - register struct i386_fp_save *user_fp_state; - register struct i386_fp_regs *user_fp_regs; - - user_fp_state = (struct i386_fp_save *) &state->hw_state[0]; - user_fp_regs = (struct i386_fp_regs *) - &state->hw_state[sizeof(struct i386_fp_save)]; - - new_ifps = 0; - Retry: - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - if (new_ifps == 0) { - simple_unlock(&pcb->lock); - new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(new_ifps,16)); - goto Retry; - } - ifps = new_ifps; - new_ifps = 0; - bzero((char *)ifps, sizeof *ifps); // zero ALL fields first - pcb->ims.ifps = ifps; - } - - /* - * Ensure that reserved parts of the environment are 0. - */ - bzero((char *)&ifps->fp_save_state, sizeof(struct i386_fp_save)); - - ifps->fp_save_state.fp_control = user_fp_state->fp_control; - ifps->fp_save_state.fp_status = user_fp_state->fp_status; - ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; - ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; - ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; - ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; - ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; - ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; - ifps->fp_regs = *user_fp_regs; - ifps->fp_save_flavor = FP_387; - simple_unlock(&pcb->lock); - if (new_ifps != 0) - zfree(ifps_zone, ifps); + if (ifps->fp_valid) { + bcopy((char *)&ifps->fx_save_state, + (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); + ret = KERN_SUCCESS; } + simple_unlock(&pcb->lock); - return KERN_SUCCESS; + return ret; } + /* - * Get the floating-point state for a thread. - * If the thread is not the current thread, it is - * not running (held). Locking needed against - * concurrent fpu_set_state or fpu_get_state. + * the child thread is 'stopped' with the thread + * mutex held and is currently not known by anyone + * so no way for fpu state to get manipulated by an + * outside agency -> no need for pcb lock */ -kern_return_t -fpu_get_state( - thread_t thr_act, - register struct i386_float_state *state) + +void +fpu_dup_fxstate( + thread_t parent, + thread_t child) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + struct x86_fpsave_state *new_ifps = NULL; + boolean_t intr; + pcb_t ppcb; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) - return KERN_FAILURE; + ppcb = parent->machine.pcb; - assert(thr_act != THREAD_NULL); - pcb = thr_act->machine.pcb; + if (ppcb->ifps == NULL) + return; - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - /* - * No valid floating-point state. - */ - simple_unlock(&pcb->lock); - bzero((char *)state, sizeof(struct i386_float_state)); - return KERN_SUCCESS; - } + if (child->machine.pcb->ifps) + panic("fpu_dup_fxstate: child's ifps non-null"); - /* Make sure we`ve got the latest fp state info */ - /* If the live fpu state belongs to our target */ - if (thr_act == current_thread()) - { - clear_ts(); - fp_save(thr_act); - clear_fpu(); - } + new_ifps = fp_state_alloc(); - state->fpkind = fp_kind; - state->exc_status = 0; + simple_lock(&ppcb->lock); - { - register struct i386_fp_save *user_fp_state; - register struct i386_fp_regs *user_fp_regs; + if (ppcb->ifps != NULL) { + /* + * Make sure we`ve got the latest fp state info + */ + intr = ml_set_interrupts_enabled(FALSE); - state->initialized = ifps->fp_valid; + clear_ts(); + fp_save(parent); + clear_fpu(); - user_fp_state = (struct i386_fp_save *) &state->hw_state[0]; - user_fp_regs = (struct i386_fp_regs *) - &state->hw_state[sizeof(struct i386_fp_save)]; + (void)ml_set_interrupts_enabled(intr); - /* - * Ensure that reserved parts of the environment are 0. - */ - bzero((char *)user_fp_state, sizeof(struct i386_fp_save)); - - user_fp_state->fp_control = ifps->fp_save_state.fp_control; - user_fp_state->fp_status = ifps->fp_save_state.fp_status; - user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; - user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; - user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; - user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; - user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; - user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; - *user_fp_regs = ifps->fp_regs; + if (ppcb->ifps->fp_valid) { + child->machine.pcb->ifps = new_ifps; + + bcopy((char *)&(ppcb->ifps->fx_save_state), + (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save)); + + new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout; + /* Mark the new fp saved state as non-live. */ + new_ifps->fp_valid = TRUE; + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ + new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + new_ifps = NULL; + } } - simple_unlock(&pcb->lock); + simple_unlock(&ppcb->lock); - return KERN_SUCCESS; + if (new_ifps != NULL) + fp_state_free(new_ifps); } + /* * Initialize FPU. * - * Raise exceptions for: - * invalid operation - * divide by zero - * overflow - * - * Use 53-bit precision. */ void fpinit(void) { unsigned short control; -ASSERT_IPL(SPL0); clear_ts(); fninit(); fnstcw(&control); control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */ - control |= (FPC_PC_53 | /* Set precision */ + control |= (FPC_PC_64 | /* Set precision */ FPC_RC_RN | /* round-to-nearest */ FPC_ZE | /* Suppress zero-divide */ FPC_OE | /* and overflow */ @@ -502,6 +469,9 @@ ASSERT_IPL(SPL0); FPC_DE | /* Allow denorms as operands */ FPC_PE); /* No trap for precision loss */ fldcw(control); + + /* Initialize SSE/SSE2 */ + __builtin_ia32_ldmxcsr(0x1f80); } /* @@ -511,16 +481,27 @@ ASSERT_IPL(SPL0); void fpnoextflt(void) { - /* - * Enable FPU use. - */ -ASSERT_IPL(SPL0); - clear_ts(); + boolean_t intr; - /* - * Load this thread`s state into the FPU. - */ - fp_load(current_thread()); + intr = ml_set_interrupts_enabled(FALSE); + + clear_ts(); /* Enable FPU use */ + + if (get_interrupt_level()) { + /* + * Save current coprocessor context if valid + * Initialize coprocessor live context + */ + fp_save(current_thread()); + fpinit(); + } else { + /* + * Load this thread`s state into coprocessor live context. + */ + fp_load(current_thread()); + } + + (void)ml_set_interrupts_enabled(intr); } /* @@ -531,9 +512,17 @@ ASSERT_IPL(SPL0); void fpextovrflt(void) { - register thread_t thr_act = current_thread(); - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + thread_t thr_act = current_thread(); + pcb_t pcb; + struct x86_fpsave_state *ifps; + boolean_t intr; + + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("FPU segment overrun exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("FPU segment overrun exception in kernel thread context\n"); /* * This is a non-recoverable error. @@ -541,8 +530,8 @@ fpextovrflt(void) */ pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; + ifps = pcb->ifps; + pcb->ifps = 0; simple_unlock(&pcb->lock); /* @@ -556,6 +545,8 @@ fpextovrflt(void) */ clear_fpu(); + (void)ml_set_interrupts_enabled(intr); + if (ifps) zfree(ifps_zone, ifps); @@ -573,22 +564,33 @@ fpextovrflt(void) void fpexterrflt(void) { - register thread_t thr_act = current_thread(); + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + boolean_t intr; + + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("FPU error exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("FPU error exception in kernel thread context\n"); -ASSERT_IPL(SPL0); /* * Save the FPU state and turn off the FPU. */ fp_save(thr_act); + (void)ml_set_interrupts_enabled(intr); + /* * Raise FPU exception. - * Locking not needed on pcb->ims.ifps, + * Locking not needed on pcb->ifps, * since thread is running. */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); + ifps->fx_save_state.fx_status); + /*NOTREACHED*/ } @@ -599,22 +601,30 @@ ASSERT_IPL(SPL0); * . if called from fpu_get_state, pcb already locked. * . if called from fpnoextflt or fp_intr, we are single-cpu * . otherwise, thread is running. + * N.B.: Must be called with interrupts disabled */ + void fp_save( thread_t thr_act) { - register pcb_t pcb = thr_act->machine.pcb; - register struct i386_fpsave_state *ifps = pcb->ims.ifps; + pcb_t pcb = thr_act->machine.pcb; + struct x86_fpsave_state *ifps = pcb->ifps; + if (ifps != 0 && !ifps->fp_valid) { - /* registers are in FPU */ - ifps->fp_valid = TRUE; - ifps->fp_save_flavor = FP_387; - if (FXSAFE()) { - fxsave(&ifps->fx_save_state); // save the SSE2/Fp state in addition is enabled - ifps->fp_save_flavor = FP_FXSR; - } - fnsave(&ifps->fp_save_state); // also update the old save area for now... + assert((get_cr0() & CR0_TS) == 0); + /* registers are in FPU */ + ifps->fp_valid = TRUE; + + if (!thread_is_64bit(thr_act)) { + /* save the compatibility/legacy mode XMM+x87 state */ + fxsave(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE32; + } + else { + fxsave64(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE64; + } } } @@ -628,76 +638,33 @@ void fp_load( thread_t thr_act) { - register pcb_t pcb = thr_act->machine.pcb; - register struct i386_fpsave_state *ifps; - -ASSERT_IPL(SPL0); - ifps = pcb->ims.ifps; - if (ifps == 0) { - ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(ifps,16)); - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - fpinit(); -#if 1 -/* - * I'm not sure this is needed. Does the fpu regenerate the interrupt in - * frstor or not? Without this code we may miss some exceptions, with it - * we might send too many exceptions. - */ - } else if (ifps->fp_valid == 2) { - /* delayed exception pending */ - - ifps->fp_valid = TRUE; - clear_fpu(); - /* - * Raise FPU exception. - * Locking not needed on pcb->ims.ifps, - * since thread is running. - */ - i386_exception(EXC_ARITHMETIC, - EXC_I386_EXTERR, - thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); - /*NOTREACHED*/ -#endif + pcb_t pcb = thr_act->machine.pcb; + struct x86_fpsave_state *ifps; + + ifps = pcb->ifps; + if (ifps == 0 || ifps->fp_valid == FALSE) { + if (ifps == 0) { + /* FIXME: This allocation mechanism should be revised + * for scenarios where interrupts are disabled. + */ + ifps = fp_state_alloc(); + pcb->ifps = ifps; + } + fpinit(); } else { - if (ifps->fp_save_flavor == FP_FXSR) fxrstor(&ifps->fx_save_state); - else frstor(ifps->fp_save_state); + assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); + if (ifps->fp_save_layout == FXSAVE32) { + /* Restore the compatibility/legacy mode XMM+x87 state */ + fxrstor(&ifps->fx_save_state); + } + else if (ifps->fp_save_layout == FXSAVE64) { + fxrstor64(&ifps->fx_save_state); + } } ifps->fp_valid = FALSE; /* in FPU */ } -/* - * Allocate and initialize FP state for current thread. - * Don't load state. - * - * Locking not needed; always called on the current thread. - */ -void -fp_state_alloc(void) -{ - pcb_t pcb = current_thread()->machine.pcb; - struct i386_fpsave_state *ifps; - - ifps = (struct i386_fpsave_state *)zalloc(ifps_zone); - assert(ALIGNED(ifps,16)); - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - - ifps->fp_valid = TRUE; - ifps->fp_save_state.fp_control = (0x037f - & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) - | (FPC_PC_53|FPC_IC_AFF); - ifps->fp_save_state.fp_status = 0; - ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ - ifps->fx_save_state.fx_control = ifps->fp_save_state.fp_control; - ifps->fx_save_state.fx_status = ifps->fp_save_state.fp_status; - ifps->fx_save_state.fx_tag = 0x00; - ifps->fx_save_state.fx_MXCSR = 0x1f80; - -} - /* * fpflush(thread_t) @@ -712,44 +679,53 @@ fpflush(__unused thread_t thr_act) /* not needed on MP x86s; fp not lazily evaluated */ } - /* - * Handle a coprocessor error interrupt on the AT386. - * This comes in on line 5 of the slave PIC at SPL1. + * SSE arithmetic exception handling code. + * Basically the same as the x87 exception handler with a different subtype */ void -fpintr(void) +fpSSEexterrflt(void) { - spl_t s; - thread_t thr_act = current_thread(); + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + boolean_t intr; -ASSERT_IPL(SPL1); - /* - * Turn off the extended 'busy' line. - */ - outb(0xf0, 0); + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("SSE exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("SSE exception in kernel thread context\n"); /* - * Save the FPU context to the thread using it. + * Save the FPU state and turn off the FPU. */ - clear_ts(); fp_save(thr_act); - fninit(); - clear_fpu(); + (void)ml_set_interrupts_enabled(intr); /* - * Since we are running on the interrupt stack, we must - * signal the thread to take the exception when we return - * to user mode. Use an AST to do this. - * - * Don`t set the thread`s AST field. If the thread is - * descheduled before it takes the AST, it will notice - * the FPU error when it reloads its FPU state. + * Raise FPU exception. + * Locking not needed on pcb->ifps, + * since thread is running. */ - s = splsched(); - mp_disable_preemption(); - ast_on(AST_I386_FP); - mp_enable_preemption(); - splx(s); + assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); + i386_exception(EXC_ARITHMETIC, + EXC_I386_SSEEXTERR, + ifps->fx_save_state.fx_status); + /*NOTREACHED*/ +} + + +void +fp_setvalid(boolean_t value) { + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + + if (ifps) { + ifps->fp_valid = value; + + if (value == TRUE) + clear_fpu(); + } } diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index 618e2e3ce..9effc417f 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -58,12 +58,36 @@ * Macro definitions for routines to manipulate the * floating-point processor. */ - -#include +#include #include #include #include #include +#include + +extern int fp_kind; + +extern void init_fpu(void); +extern void fpu_module_init(void); +extern void fpu_free( + struct x86_fpsave_state * fps); +extern kern_return_t fpu_set_fxstate( + thread_t thr_act, + thread_state_t state); +extern kern_return_t fpu_get_fxstate( + thread_t thr_act, + thread_state_t state); +extern void fpu_dup_fxstate( + thread_t parent, + thread_t child); +extern void fpnoextflt(void); +extern void fpextovrflt(void); +extern void fpexterrflt(void); +extern void fpSSEexterrflt(void); +extern void fpflush(thread_t); +extern void fp_setvalid(boolean_t); +extern void fxsave64(struct x86_fx_save *); +extern void fxrstor64(struct x86_fx_save *); /* * FPU instructions. @@ -103,58 +127,45 @@ extern __inline__ unsigned short fnstsw(void) #define FXSAFE() (fp_kind == FP_FXSR) -#define fpu_load_context(pcb) + +static inline void clear_fpu(void) +{ + set_ts(); +} /* * Save thread`s FPU context. - * If only one CPU, we just set the task-switched bit, - * to keep the new thread from using the coprocessor. - * If multiple CPUs, we save the entire state. - * NOTE: in order to provide backwards compatible support in the kernel. When saving SSE2 state, we also save the - * FP state in it's old location. Otherwise fpu_get_state() and fpu_set_state() will stop working */ -#define fpu_save_context(thread) \ - { \ - register struct i386_fpsave_state *ifps; \ - ifps = (thread)->machine.pcb->ims.ifps; \ - if (ifps != 0 && !ifps->fp_valid) { \ - /* registers are in FPU - save to memory */ \ - ifps->fp_valid = TRUE; \ - ifps->fp_save_flavor = FP_387; \ - if (FXSAFE()) { \ - fxsave(&ifps->fx_save_state); \ - ifps->fp_save_flavor = FP_FXSR; \ - } \ - fnsave(&ifps->fp_save_state); \ - } \ - set_ts(); \ - } - - - -extern int fp_kind; -extern void init_fpu(void); -extern void fpu_module_init(void); -extern void fpu_free( - struct i386_fpsave_state * fps); -extern kern_return_t fpu_set_state( - thread_t thr_act, - struct i386_float_state * st); -extern kern_return_t fpu_get_state( - thread_t thr_act, - struct i386_float_state * st); -extern kern_return_t fpu_set_fxstate( - thread_t thr_act, - struct i386_float_state * st); -extern kern_return_t fpu_get_fxstate( - thread_t thr_act, - struct i386_float_state * st); -extern void fpnoextflt(void); -extern void fpextovrflt(void); -extern void fpexterrflt(void); -extern void fp_state_alloc(void); -extern void fpintr(void); -extern void fpflush(thread_t); +static inline void fpu_save_context(thread_t thread) +{ + struct x86_fpsave_state *ifps; + + assert(ml_get_interrupts_enabled() == FALSE); + ifps = (thread)->machine.pcb->ifps; + if (ifps != 0 && !ifps->fp_valid) { + /* Clear CR0.TS in preparation for the FP context save. In + * theory, this shouldn't be necessary since a live FPU should + * indicate that TS is clear. However, various routines + * (such as sendsig & sigreturn) manipulate TS directly. + */ + clear_ts(); + /* registers are in FPU - save to memory */ + ifps->fp_valid = TRUE; + if (!thread_is_64bit(thread) || is_saved_state32(thread->machine.pcb->iss)) { + /* save the compatibility/legacy mode XMM+x87 state */ + fxsave(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE32; + } + else { + /* Execute a brief jump to 64-bit mode to save the 64 + * bit state + */ + fxsave64(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE64; + } + } + set_ts(); +} #endif /* _I386_FPU_H_ */ diff --git a/osfmk/i386/gdt.c b/osfmk/i386/gdt.c index df29cf3a9..500c34495 100644 --- a/osfmk/i386/gdt.c +++ b/osfmk/i386/gdt.c @@ -60,65 +60,61 @@ #include #include -#ifdef MACH_BSD -extern int trap_unix_syscall(void), trap_mach25_syscall(void), - trap_machdep_syscall(void), syscall(void); -#endif - -struct fake_descriptor gdt[GDTSZ] = { -/* 0x000 */ { 0, 0, 0, 0 }, /* always NULL */ -/* 0x008 */ { 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_CODE_R - }, /* kernel code */ -/* 0x010 */ { 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_DATA_W - }, /* kernel data */ -/* 0x018 */ { (unsigned int)ldt, - LDTSZ*sizeof(struct fake_descriptor)-1, - 0, - ACC_P|ACC_PL_K|ACC_LDT - }, /* local descriptor table */ -/* 0x020 */ { (unsigned int)&ktss, - sizeof(struct i386_tss)-1, - 0, - ACC_P|ACC_PL_K|ACC_TSS - }, /* TSS for this processor */ -#ifdef MACH_BSD -/* 0x28 */ { (unsigned int) &trap_unix_syscall, - KERNEL_CS, - 0, /* no parameters */ - ACC_P|ACC_PL_U|ACC_CALL_GATE - }, -/* 0x30 */ { (unsigned int) &trap_mach25_syscall, - KERNEL_CS, - 0, /* no parameters */ - ACC_P|ACC_PL_U|ACC_CALL_GATE - }, -/* 0x38 */ { (unsigned int) &trap_machdep_syscall, - KERNEL_CS, - 0, /* no parameters */ - ACC_P|ACC_PL_U|ACC_CALL_GATE - }, -#else -/* 0x028 */ { 0, 0, 0, 0 }, /* per-thread LDT */ -/* 0x030 */ { 0, 0, 0, 0 }, /* per-thread TSS for IO bitmap */ -/* 0x038 */ { 0, 0, 0, 0 }, -#endif -/* 0x040 */ { 0, 0, 0, 0 }, -/* 0x048 */ { (unsigned int)&cpu_data_master, - sizeof(cpu_data_t)-1, - SZ_32, - ACC_P|ACC_PL_K|ACC_DATA_W - }, /* per-CPU current thread address */ +struct fake_descriptor master_gdt[GDTSZ] __attribute__ ((aligned (4096))) = { + [SEL_TO_INDEX(KERNEL_CS)] { /* kernel code */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_CODE_R, + }, + [SEL_TO_INDEX(KERNEL_DS)] { /* kernel data */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_DATA_W + }, + [SEL_TO_INDEX(KERNEL_LDT)] { /* local descriptor table */ + (uint32_t) &master_ldt, + LDTSZ_MIN*sizeof(struct fake_descriptor)-1, + 0, + ACC_P|ACC_PL_K|ACC_LDT + }, /* The slot KERNEL_LDT_2 is reserved. */ + [SEL_TO_INDEX(KERNEL_TSS)] { /* TSS for this processor */ + (uint32_t) &master_ktss, + sizeof(struct i386_tss)-1, + 0, + ACC_P|ACC_PL_K|ACC_TSS + }, /* The slot KERNEL_TSS_2 is reserved. */ + [SEL_TO_INDEX(CPU_DATA_GS)] { /* per-CPU current thread address */ + (uint32_t) &cpu_data_master, + sizeof(cpu_data_t)-1, + SZ_32, + ACC_P|ACC_PL_K|ACC_DATA_W + }, + [SEL_TO_INDEX(USER_LDT)] { /* user local descriptor table */ + (uint32_t) &master_ldt, + LDTSZ_MIN*sizeof(struct fake_descriptor)-1, + 0, + ACC_P|ACC_PL_K|ACC_LDT + }, + [SEL_TO_INDEX(KERNEL64_CS)] { /* kernel 64-bit code */ + 0, + 0xfffff, + SZ_64|SZ_G, + ACC_P|ACC_PL_K|ACC_CODE_R + }, + [SEL_TO_INDEX(KERNEL64_SS)] { /* kernel 64-bit syscall stack */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_DATA_W + }, #if MACH_KDB -/* 0x050 */ { (unsigned int)&dbtss, - sizeof(struct i386_tss)-1, - 0, - ACC_P|ACC_PL_K|ACC_TSS - } /* TSS for this processor */ + [SEL_TO_INDEX(DEBUG_TSS)] { /* TSS for this processor */ + (uint32_t)&master_dbtss, + sizeof(struct i386_tss)-1, + 0, + ACC_P|ACC_PL_K|ACC_TSS + }, #endif /* MACH_KDB */ }; diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index 64dd377bc..56cffc191 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -61,21 +61,26 @@ #include #include #include +#include +#include #include #include #include -#include -#include +#include +#include #include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include #include -#include #include -#include #include /* @@ -145,6 +150,8 @@ main( DECLARE("TASK_MACH_EXC_PORT", offsetof(task_t, exc_actions[EXC_MACH_SYSCALL].port)); + DECLARE("TASK_SYSCALLS_MACH", offsetof(struct task *, syscalls_mach)); + DECLARE("TASK_SYSCALLS_UNIX", offsetof(struct task *, syscalls_unix)); /* These fields are being added on demand */ DECLARE("ACT_MACH_EXC_PORT", @@ -152,50 +159,131 @@ main( DECLARE("ACT_TASK", offsetof(thread_t, task)); DECLARE("ACT_PCB", offsetof(thread_t, machine.pcb)); + DECLARE("ACT_SPF", offsetof(thread_t, machine.specFlags)); DECLARE("ACT_MAP", offsetof(thread_t, map)); + DECLARE("ACT_COPYIO_STATE", offsetof(thread_t, machine.copyio_state)); + DECLARE("ACT_PCB_ISS", offsetof(thread_t, machine.xxx_pcb.iss)); + DECLARE("ACT_PCB_IDS", offsetof(thread_t, machine.xxx_pcb.ids)); + + DECLARE("WINDOWS_CLEAN", WINDOWS_CLEAN); DECLARE("MAP_PMAP", offsetof(vm_map_t, pmap)); #define IKS ((size_t) (STACK_IKS(0))) - DECLARE("KSS_EBX", IKS + offsetof(struct i386_kernel_state *, k_ebx)); - DECLARE("KSS_ESP", IKS + offsetof(struct i386_kernel_state *, k_esp)); - DECLARE("KSS_EBP", IKS + offsetof(struct i386_kernel_state *, k_ebp)); - DECLARE("KSS_EDI", IKS + offsetof(struct i386_kernel_state *, k_edi)); - DECLARE("KSS_ESI", IKS + offsetof(struct i386_kernel_state *, k_esi)); - DECLARE("KSS_EIP", IKS + offsetof(struct i386_kernel_state *, k_eip)); + DECLARE("KSS_EBX", IKS + offsetof(struct x86_kernel_state32 *, k_ebx)); + DECLARE("KSS_ESP", IKS + offsetof(struct x86_kernel_state32 *, k_esp)); + DECLARE("KSS_EBP", IKS + offsetof(struct x86_kernel_state32 *, k_ebp)); + DECLARE("KSS_EDI", IKS + offsetof(struct x86_kernel_state32 *, k_edi)); + DECLARE("KSS_ESI", IKS + offsetof(struct x86_kernel_state32 *, k_esi)); + DECLARE("KSS_EIP", IKS + offsetof(struct x86_kernel_state32 *, k_eip)); - DECLARE("IKS_SIZE", sizeof(struct i386_kernel_state)); + DECLARE("IKS_SIZE", sizeof(struct x86_kernel_state32)); DECLARE("IEL_SIZE", sizeof(struct i386_exception_link)); - DECLARE("PCB_FPS", offsetof(pcb_t, ims.ifps)); + DECLARE("PCB_FPS", offsetof(pcb_t, ifps)); DECLARE("PCB_ISS", offsetof(pcb_t, iss)); - DECLARE("FP_VALID", offsetof(struct i386_fpsave_state *,fp_valid)); - DECLARE("FP_SAVE_STATE", - offsetof(struct i386_fpsave_state *, fp_save_state)); - - DECLARE("R_CS", offsetof(struct i386_saved_state *, cs)); - DECLARE("R_SS", offsetof(struct i386_saved_state *, ss)); - DECLARE("R_UESP", offsetof(struct i386_saved_state *, uesp)); - DECLARE("R_EBP", offsetof(struct i386_saved_state *, ebp)); - DECLARE("R_EAX", offsetof(struct i386_saved_state *, eax)); - DECLARE("R_EBX", offsetof(struct i386_saved_state *, ebx)); - DECLARE("R_ECX", offsetof(struct i386_saved_state *, ecx)); - DECLARE("R_EDX", offsetof(struct i386_saved_state *, edx)); - DECLARE("R_ESI", offsetof(struct i386_saved_state *, esi)); - DECLARE("R_EDI", offsetof(struct i386_saved_state *, edi)); - DECLARE("R_TRAPNO", offsetof(struct i386_saved_state *, trapno)); - DECLARE("R_ERR", offsetof(struct i386_saved_state *, err)); - DECLARE("R_EFLAGS", offsetof(struct i386_saved_state *, efl)); - DECLARE("R_EIP", offsetof(struct i386_saved_state *, eip)); - DECLARE("R_CR2", offsetof(struct i386_saved_state *, cr2)); - DECLARE("ISS_SIZE", sizeof (struct i386_saved_state)); - - DECLARE("I_ECX", offsetof(struct i386_interrupt_state *, ecx)); - DECLARE("I_EIP", offsetof(struct i386_interrupt_state *, eip)); - DECLARE("I_CS", offsetof(struct i386_interrupt_state *, cs)); - DECLARE("I_EFL", offsetof(struct i386_interrupt_state *, efl)); + DECLARE("DS_DR0", offsetof(struct x86_debug_state32 *, dr0)); + DECLARE("DS_DR1", offsetof(struct x86_debug_state32 *, dr1)); + DECLARE("DS_DR2", offsetof(struct x86_debug_state32 *, dr2)); + DECLARE("DS_DR3", offsetof(struct x86_debug_state32 *, dr3)); + DECLARE("DS_DR4", offsetof(struct x86_debug_state32 *, dr4)); + DECLARE("DS_DR5", offsetof(struct x86_debug_state32 *, dr5)); + DECLARE("DS_DR6", offsetof(struct x86_debug_state32 *, dr6)); + DECLARE("DS_DR7", offsetof(struct x86_debug_state32 *, dr7)); + + DECLARE("DS64_DR0", offsetof(struct x86_debug_state64 *, dr0)); + DECLARE("DS64_DR1", offsetof(struct x86_debug_state64 *, dr1)); + DECLARE("DS64_DR2", offsetof(struct x86_debug_state64 *, dr2)); + DECLARE("DS64_DR3", offsetof(struct x86_debug_state64 *, dr3)); + DECLARE("DS64_DR4", offsetof(struct x86_debug_state64 *, dr4)); + DECLARE("DS64_DR5", offsetof(struct x86_debug_state64 *, dr5)); + DECLARE("DS64_DR6", offsetof(struct x86_debug_state64 *, dr6)); + DECLARE("DS64_DR7", offsetof(struct x86_debug_state64 *, dr7)); + + DECLARE("FP_VALID", offsetof(struct x86_fpsave_state *,fp_valid)); + + DECLARE("SS_FLAVOR", offsetof(x86_saved_state_t *, flavor)); + DECLARE("SS_32", x86_SAVED_STATE32); + DECLARE("SS_64", x86_SAVED_STATE64); + +#define R_(x) offsetof(x86_saved_state_t *, ss_32.x) + DECLARE("R_CS", R_(cs)); + DECLARE("R_SS", R_(ss)); + DECLARE("R_DS", R_(ds)); + DECLARE("R_ES", R_(es)); + DECLARE("R_FS", R_(fs)); + DECLARE("R_GS", R_(gs)); + DECLARE("R_UESP", R_(uesp)); + DECLARE("R_EBP", R_(ebp)); + DECLARE("R_EAX", R_(eax)); + DECLARE("R_EBX", R_(ebx)); + DECLARE("R_ECX", R_(ecx)); + DECLARE("R_EDX", R_(edx)); + DECLARE("R_ESI", R_(esi)); + DECLARE("R_EDI", R_(edi)); + DECLARE("R_TRAPNO", R_(trapno)); + DECLARE("R_ERR", R_(err)); + DECLARE("R_EFLAGS", R_(efl)); + DECLARE("R_EIP", R_(eip)); + DECLARE("R_CR2", R_(cr2)); + DECLARE("ISS32_SIZE", sizeof (x86_saved_state32_t)); + +#define R64_(x) offsetof(x86_saved_state_t *, ss_64.x) + DECLARE("R64_FS", R64_(fs)); + DECLARE("R64_GS", R64_(gs)); + DECLARE("R64_R8", R64_(r8)); + DECLARE("R64_R9", R64_(r9)); + DECLARE("R64_R10", R64_(r10)); + DECLARE("R64_R11", R64_(r11)); + DECLARE("R64_R12", R64_(r12)); + DECLARE("R64_R13", R64_(r13)); + DECLARE("R64_R14", R64_(r14)); + DECLARE("R64_R15", R64_(r15)); + DECLARE("R64_RBP", R64_(rbp)); + DECLARE("R64_RAX", R64_(rax)); + DECLARE("R64_RBX", R64_(rbx)); + DECLARE("R64_RCX", R64_(rcx)); + DECLARE("R64_RDX", R64_(rdx)); + DECLARE("R64_RSI", R64_(rsi)); + DECLARE("R64_RDI", R64_(rdi)); + DECLARE("R64_V_ARG6", R64_(v_arg6)); + DECLARE("R64_V_ARG7", R64_(v_arg7)); + DECLARE("R64_V_ARG8", R64_(v_arg8)); + DECLARE("R64_CS", R64_(isf.cs)); + DECLARE("R64_SS", R64_(isf.ss)); + DECLARE("R64_RSP", R64_(isf.rsp)); + DECLARE("R64_TRAPNO", R64_(isf.trapno)); + DECLARE("R64_TRAPFN", R64_(isf.trapfn)); + DECLARE("R64_ERR", R64_(isf.err)); + DECLARE("R64_RFLAGS", R64_(isf.rflags)); + DECLARE("R64_RIP", R64_(isf.rip)); + DECLARE("R64_CR2", R64_(cr2)); + DECLARE("ISS64_OFFSET", R64_(isf)); + DECLARE("ISS64_SIZE", sizeof (x86_saved_state64_t)); + +#define ISF64_(x) offsetof(x86_64_intr_stack_frame_t *, x) + DECLARE("ISF64_TRAPNO", ISF64_(trapno)); + DECLARE("ISF64_TRAPFN", ISF64_(trapfn)); + DECLARE("ISF64_ERR", ISF64_(err)); + DECLARE("ISF64_RIP", ISF64_(rip)); + DECLARE("ISF64_CS", ISF64_(cs)); + DECLARE("ISF64_RFLAGS", ISF64_(rflags)); + DECLARE("ISF64_RSP", ISF64_(rsp)); + DECLARE("ISF64_SS", ISF64_(ss)); + DECLARE("ISF64_SIZE", sizeof(x86_64_intr_stack_frame_t)); + + DECLARE("ISC32_OFFSET", offsetof(x86_saved_state_compat32_t *, isf64)); +#define ISC32_(x) offsetof(x86_saved_state_compat32_t *, isf64.x) + DECLARE("ISC32_TRAPNO", ISC32_(trapno)); + DECLARE("ISC32_TRAPFN", ISC32_(trapfn)); + DECLARE("ISC32_ERR", ISC32_(err)); + DECLARE("ISC32_RIP", ISC32_(rip)); + DECLARE("ISC32_CS", ISC32_(cs)); + DECLARE("ISC32_RFLAGS", ISC32_(rflags)); + DECLARE("ISC32_RSP", ISC32_(rsp)); + DECLARE("ISC32_SS", ISC32_(ss)); DECLARE("NBPG", I386_PGBYTES); DECLARE("PAGE_SIZE", I386_PGBYTES); @@ -208,6 +296,7 @@ main( DECLARE("KERNELBASE", VM_MIN_KERNEL_ADDRESS); DECLARE("LINEAR_KERNELBASE", LINEAR_KERNEL_ADDRESS); DECLARE("KERNEL_STACK_SIZE", KERNEL_STACK_SIZE); + DECLARE("KERNEL_UBER_BASE_HI32", KERNEL_UBER_BASE_HI32); DECLARE("COMM_PAGE_BASE_ADDR", _COMM_PAGE_BASE_ADDRESS); @@ -220,7 +309,6 @@ main( DECLARE("PTE_PS", INTEL_PTE_PS); DECLARE("PTE_U", INTEL_PTE_USER); DECLARE("PTE_INVALID", ~INTEL_PTE_VALID); - DECLARE("CR4_PAE", CR4_PAE); DECLARE("NPGPTD", NPGPTD); DECLARE("IDTSZ", IDTSZ); @@ -231,19 +319,27 @@ main( DECLARE("KERNEL_DS", KERNEL_DS); DECLARE("USER_CS", USER_CS); DECLARE("USER_DS", USER_DS); + DECLARE("KERNEL64_CS", KERNEL64_CS); + DECLARE("USER64_CS", USER64_CS); DECLARE("KERNEL_TSS", KERNEL_TSS); DECLARE("KERNEL_LDT", KERNEL_LDT); + DECLARE("DF_TSS", DF_TSS); + DECLARE("MC_TSS", MC_TSS); #if MACH_KDB DECLARE("DEBUG_TSS", DEBUG_TSS); #endif /* MACH_KDB */ DECLARE("CPU_DATA_GS", CPU_DATA_GS); + DECLARE("SYSENTER_CS", SYSENTER_CS); + DECLARE("SYSENTER_TF_CS",SYSENTER_TF_CS); + DECLARE("SYSENTER_DS", SYSENTER_DS); + DECLARE("SYSCALL_CS", SYSCALL_CS); + DECLARE("USER_WINDOW_SEL", USER_WINDOW_SEL); + DECLARE("PHYS_WINDOW_SEL", PHYS_WINDOW_SEL); DECLARE("CPU_THIS", offsetof(cpu_data_t *, cpu_this)); DECLARE("CPU_ACTIVE_THREAD", offsetof(cpu_data_t *, cpu_active_thread)); - DECLARE("CPU_ACTIVE_KLOADED", - offsetof(cpu_data_t *, cpu_active_kloaded)); DECLARE("CPU_ACTIVE_STACK", offsetof(cpu_data_t *, cpu_active_stack)); DECLARE("CPU_KERNEL_STACK", @@ -268,10 +364,63 @@ main( offsetof(cpu_data_t *,cpu_pending_ast)); DECLARE("CPU_DESC_TABLEP", offsetof(cpu_data_t *,cpu_desc_tablep)); + DECLARE("CPU_DESC_INDEX", + offsetof(cpu_data_t *,cpu_desc_index)); + DECLARE("CDI_GDT", + offsetof(cpu_desc_index_t *,cdi_gdt)); + DECLARE("CDI_IDT", + offsetof(cpu_desc_index_t *,cdi_idt)); DECLARE("CPU_PROCESSOR", offsetof(cpu_data_t *,cpu_processor)); - DECLARE("CPU_RTC_NANOTIME", - offsetof(cpu_data_t *,cpu_rtc_nanotime)); + DECLARE("CPU_INT_STATE", + offsetof(cpu_data_t *, cpu_int_state)); + + DECLARE("CPU_HI_ISS", + offsetof(cpu_data_t *, cpu_hi_iss)); + DECLARE("CPU_TASK_CR3", + offsetof(cpu_data_t *, cpu_task_cr3)); + DECLARE("CPU_ACTIVE_CR3", + offsetof(cpu_data_t *, cpu_active_cr3)); + DECLARE("CPU_KERNEL_CR3", + offsetof(cpu_data_t *, cpu_kernel_cr3)); + + DECLARE("CPU_IS64BIT", + offsetof(cpu_data_t *, cpu_is64bit)); + DECLARE("CPU_TASK_MAP", + offsetof(cpu_data_t *, cpu_task_map)); + DECLARE("TASK_MAP_32BIT", TASK_MAP_32BIT); + DECLARE("TASK_MAP_64BIT", TASK_MAP_64BIT); + DECLARE("TASK_MAP_64BIT_SHARED", TASK_MAP_64BIT_SHARED); + DECLARE("CPU_UBER_USER_GS_BASE", + offsetof(cpu_data_t *, cpu_uber.cu_user_gs_base)); + DECLARE("CPU_UBER_ISF", + offsetof(cpu_data_t *, cpu_uber.cu_isf)); + DECLARE("CPU_UBER_TMP", + offsetof(cpu_data_t *, cpu_uber.cu_tmp)); + DECLARE("CPU_DR7", + offsetof(cpu_data_t *, cpu_dr7)); + + DECLARE("hwIntCnt", offsetof(cpu_data_t *,cpu_hwIntCnt)); + + DECLARE("enaExpTrace", enaExpTrace); + DECLARE("enaExpTraceb", enaExpTraceb); + DECLARE("enaUsrFCall", enaUsrFCall); + DECLARE("enaUsrFCallb", enaUsrFCallb); + DECLARE("enaUsrPhyMp", enaUsrPhyMp); + DECLARE("enaUsrPhyMpb", enaUsrPhyMpb); + DECLARE("enaDiagSCs", enaDiagSCs); + DECLARE("enaDiagSCsb", enaDiagSCsb); + DECLARE("enaDiagEM", enaDiagEM); + DECLARE("enaDiagEMb", enaDiagEMb); + DECLARE("enaNotifyEM", enaNotifyEM); + DECLARE("enaNotifyEMb", enaNotifyEMb); + DECLARE("dgLock", offsetof(struct diagWork *, dgLock)); + DECLARE("dgFlags", offsetof(struct diagWork *, dgFlags)); + DECLARE("dgMisc1", offsetof(struct diagWork *, dgMisc1)); + DECLARE("dgMisc2", offsetof(struct diagWork *, dgMisc2)); + DECLARE("dgMisc3", offsetof(struct diagWork *, dgMisc3)); + DECLARE("dgMisc4", offsetof(struct diagWork *, dgMisc4)); + DECLARE("dgMisc5", offsetof(struct diagWork *, dgMisc5)); DECLARE("INTEL_PTE_KERNEL", INTEL_PTE_VALID|INTEL_PTE_WRITE); DECLARE("PTDPTDI", PTDPTDI); @@ -279,6 +428,8 @@ main( DECLARE("PDESIZE", PDESIZE); DECLARE("PTESIZE", PTESIZE); DECLARE("APTDPTDI", APTDPTDI); + DECLARE("HIGH_MEM_BASE", HIGH_MEM_BASE); + DECLARE("HIGH_IDT_BASE", pmap_index_to_virt(HIGH_FIXED_IDT)); DECLARE("KERNELBASEPDE", (LINEAR_KERNEL_ADDRESS >> PDESHIFT) * @@ -294,6 +445,7 @@ main( DECLARE("K_TRAP_GATE", ACC_P|ACC_PL_K|ACC_TRAP_GATE); DECLARE("U_TRAP_GATE", ACC_P|ACC_PL_U|ACC_TRAP_GATE); DECLARE("K_INTR_GATE", ACC_P|ACC_PL_K|ACC_INTR_GATE); + DECLARE("U_INTR_GATE", ACC_P|ACC_PL_U|ACC_INTR_GATE); DECLARE("K_TSS", ACC_P|ACC_PL_K|ACC_TSS); /* @@ -302,33 +454,22 @@ main( DECLARE("USL_INTERLOCK", offsetof(usimple_lock_t, interlock)); DECLARE("INTSTACK_SIZE", INTSTACK_SIZE); - DECLARE("MP_GDT", offsetof(struct mp_desc_table *, gdt[0])); - DECLARE("MP_IDT", offsetof(struct mp_desc_table *, idt[0])); DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); - DECLARE("KADDR", offsetof(struct KernelBootArgs *, kaddr)); - DECLARE("KSIZE", offsetof(struct KernelBootArgs *, ksize)); - - DECLARE("NANOTIME_BASE_TSC", - offsetof(commpage_nanotime_t*, nt_base_tsc)); - DECLARE("NANOTIME_BASE_NS", - offsetof(commpage_nanotime_t*, nt_base_ns)); - DECLARE("NANOTIME_SCALE", - offsetof(commpage_nanotime_t*, nt_scale)); - DECLARE("NANOTIME_SHIFT", - offsetof(commpage_nanotime_t*, nt_shift)); - DECLARE("NANOTIME_CHECK_TSC", - offsetof(commpage_nanotime_t*, nt_check_tsc)); - - DECLARE("RTN_TSC", - offsetof(rtc_nanotime_t *, rnt_tsc)); - DECLARE("RTN_NANOS", - offsetof(rtc_nanotime_t *, rnt_nanos)); - DECLARE("RTN_SCALE", - offsetof(rtc_nanotime_t *, rnt_scale)); - DECLARE("RTN_SHIFT", - offsetof(rtc_nanotime_t *, rnt_shift)); + DECLARE("KADDR", offsetof(struct boot_args *, kaddr)); + DECLARE("KSIZE", offsetof(struct boot_args *, ksize)); + DECLARE("MEMORYMAP", offsetof(struct boot_args *, MemoryMap)); + DECLARE("DEVICETREEP", offsetof(struct boot_args *, deviceTreeP)); + + DECLARE("RNT_TSC_BASE", + offsetof(rtc_nanotime_t *, tsc_base)); + DECLARE("RNT_NS_BASE", + offsetof(rtc_nanotime_t *, ns_base)); + DECLARE("RNT_SCALE", + offsetof(rtc_nanotime_t *, scale)); + DECLARE("RNT_SHIFT", + offsetof(rtc_nanotime_t *, shift)); /* values from kern/timer.h */ DECLARE("TIMER_LOW", @@ -349,6 +490,22 @@ main( DECLARE("USER_TIMER", offsetof(struct thread *, user_timer)); + DECLARE("OnProc", OnProc); + + + DECLARE("GCAP_ID", offsetof(hpetReg_t *, GCAP_ID)); + DECLARE("GEN_CONF", offsetof(hpetReg_t *, GEN_CONF)); + DECLARE("GINTR_STA", offsetof(hpetReg_t *, GINTR_STA)); + DECLARE("MAIN_CNT", offsetof(hpetReg_t *, MAIN_CNT)); + DECLARE("TIM0_CONF", offsetof(hpetReg_t *, TIM0_CONF)); + DECLARE("TIM_CONF", TIM_CONF); + DECLARE("Tn_INT_ENB_CNF", Tn_INT_ENB_CNF); + DECLARE("TIM0_COMP", offsetof(hpetReg_t *, TIM0_COMP)); + DECLARE("TIM_COMP", TIM_COMP); + DECLARE("TIM1_CONF", offsetof(hpetReg_t *, TIM1_CONF)); + DECLARE("TIM1_COMP", offsetof(hpetReg_t *, TIM1_COMP)); + DECLARE("TIM2_CONF", offsetof(hpetReg_t *, TIM2_CONF)); + DECLARE("TIM2_COMP", offsetof(hpetReg_t *, TIM2_COMP)); + return (0); } - diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c index d243fcd56..d8f14ea46 100644 --- a/osfmk/i386/hibernate_i386.c +++ b/osfmk/i386/hibernate_i386.c @@ -31,74 +31,90 @@ #include #include #include -#define KERNEL + +#include #include #include +#include "i386_lowmem.h" -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -/* This assumes that - * - we never will want to read or write memory below the start of kernel text - * - kernel text and data isn't included in pmap memory regions - */ +#define MAX_BANKS 32 -extern void *sectTEXTB; -extern char *first_avail; +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ hibernate_page_list_t * hibernate_page_list_allocate(void) { - vm_offset_t base; + ppnum_t base, num; vm_size_t size; - uint32_t bank; + uint32_t bank, num_banks; uint32_t pages, page_count; hibernate_page_list_t * list; hibernate_bitmap_t * bitmap; - pmap_memory_region_t * regions; - pmap_memory_region_t * rp; - uint32_t num_regions, num_alloc_regions; - - page_count = 0; - - /* Make a list of the maximum number of regions needed */ - num_alloc_regions = 1 + pmap_memory_region_count; - - /* Allocate our own list of memory regions so we can sort them in order. */ - regions = (pmap_memory_region_t *)kalloc(sizeof(pmap_memory_region_t) * num_alloc_regions); - if (!regions) - return (0); - /* Fill in the actual regions we will be returning. */ - rp = regions; + EfiMemoryRange * mptr; + uint32_t mcount, msize, i; + hibernate_bitmap_t dram_ranges[MAX_BANKS]; + boot_args * args = (boot_args *) PE_state.bootArgs; - /* XXX should check for non-volatile memory region below kernel space. */ - /* Kernel region is first. */ - base = (vm_offset_t)(sectTEXTB) & 0x3FFFFFFF; - rp->base = atop_32(base); - rp->end = atop_32((vm_offset_t)first_avail) - 1; - rp->alloc = 0; - num_regions = 1; + mptr = args->MemoryMap; + if (args->MemoryMapDescriptorSize == 0) + panic("Invalid memory map descriptor size"); + msize = args->MemoryMapDescriptorSize; + mcount = args->MemoryMapSize / msize; - /* Remaining memory regions. Consolidate adjacent regions. */ - for (bank = 0; bank < (uint32_t) pmap_memory_region_count; bank++) + num_banks = 0; + for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { - if ((rp->end + 1) == pmap_memory_regions[bank].base) { - rp->end = pmap_memory_regions[bank].end; - } else { - ++rp; - ++num_regions; - rp->base = pmap_memory_regions[bank].base; - rp->end = pmap_memory_regions[bank].end; - rp->alloc = 0; - } + base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); + num = (ppnum_t) mptr->NumberOfPages; + if (!num) + continue; + + switch (mptr->Type) + { + // any kind of dram + case kEfiLoaderCode: + case kEfiLoaderData: + case kEfiBootServicesCode: + case kEfiBootServicesData: + case kEfiConventionalMemory: + case kEfiACPIReclaimMemory: + case kEfiACPIMemoryNVS: + case kEfiPalCode: + + if (!num_banks || (base != (1 + dram_ranges[num_banks - 1].last_page))) + { + num_banks++; + if (num_banks >= MAX_BANKS) + break; + dram_ranges[num_banks - 1].first_page = base; + } + dram_ranges[num_banks - 1].last_page = base + num - 1; + break; + + // runtime services will be restarted, so no save + case kEfiRuntimeServicesCode: + case kEfiRuntimeServicesData: + // non dram + case kEfiReservedMemoryType: + case kEfiUnusableMemory: + case kEfiMemoryMappedIO: + case kEfiMemoryMappedIOPortSpace: + default: + break; + } } - /* Size the hibernation bitmap */ + if (num_banks >= MAX_BANKS) + return (NULL); + + // size the hibernation bitmap + size = sizeof(hibernate_page_list_t); page_count = 0; - for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) { - pages = rp->end + 1 - rp->base; + for (bank = 0; bank < num_banks; bank++) { + pages = dram_ranges[bank].last_page + 1 - dram_ranges[bank].first_page; page_count += pages; size += sizeof(hibernate_bitmap_t) + ((pages + 31) >> 5) * sizeof(uint32_t); } @@ -109,62 +125,75 @@ hibernate_page_list_allocate(void) list->list_size = size; list->page_count = page_count; - list->bank_count = num_regions; + list->bank_count = num_banks; + + // convert to hibernation bitmap. - /* Convert to hibernation bitmap. */ - /* This assumes that ranges are in order and do not overlap. */ bitmap = &list->bank_bitmap[0]; - for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) { - bitmap->first_page = rp->base; - bitmap->last_page = rp->end; + for (bank = 0; bank < num_banks; bank++) + { + bitmap->first_page = dram_ranges[bank].first_page; + bitmap->last_page = dram_ranges[bank].last_page; bitmap->bitmapwords = (bitmap->last_page + 1 - bitmap->first_page + 31) >> 5; - kprintf("HIB: Bank %d: 0x%x end 0x%x\n", bank, - ptoa_32(bitmap->first_page), - ptoa_32(bitmap->last_page)); + kprintf("hib bank[%d]: 0x%x000 end 0x%xfff\n", bank, + bitmap->first_page, + bitmap->last_page); bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } - kfree((void *)regions, sizeof(pmap_memory_region_t) * num_alloc_regions); return (list); } +// mark pages not to be saved, but available for scratch usage during restore + +void +hibernate_page_list_setall_machine( __unused hibernate_page_list_t * page_list, + __unused hibernate_page_list_t * page_list_wired, + __unused uint32_t * pagesOut) +{ +} + +// mark pages not to be saved and not for scratch usage during restore void -hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, - hibernate_page_list_t * page_list_wired, - uint32_t * pagesOut) +hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) { - KernelBootArgs_t * bootArgs = (KernelBootArgs_t *)PE_state.bootArgs; - MemoryRange * mptr; - uint32_t bank; - uint32_t page, count; - - for (bank = 0, mptr = bootArgs->memoryMap; bank < bootArgs->memoryMapCount; bank++, mptr++) { - - if (kMemoryRangeNVS != mptr->type) continue; - kprintf("Base NVS region 0x%x + 0x%x\n", (vm_offset_t)mptr->base, (vm_size_t)mptr->length); - /* Round to page size. Hopefully this does not overlap any reserved areas. */ - page = atop_32(trunc_page((vm_offset_t)mptr->base)); - count = atop_32(round_page((vm_offset_t)mptr->base + (vm_size_t)mptr->length)) - page; - kprintf("Rounded NVS region 0x%x size 0x%x\n", page, count); - - hibernate_set_page_state(page_list, page_list_wired, page, count, 1); - pagesOut -= count; + boot_args * args = (boot_args *) PE_state.bootArgs; + + hibernate_set_page_state(page_list, page_list_wired, + I386_HIB_PAGETABLE, I386_HIB_PAGETABLE_COUNT, + kIOHibernatePageStateFree); + *pagesOut -= I386_HIB_PAGETABLE_COUNT; + + if (args->efiRuntimeServicesPageStart) + { + hibernate_set_page_state(page_list, page_list_wired, + args->efiRuntimeServicesPageStart, args->efiRuntimeServicesPageCount, + kIOHibernatePageStateFree); + *pagesOut -= args->efiRuntimeServicesPageCount; } } kern_return_t hibernate_processor_setup(IOHibernateImageHeader * header) { - current_cpu_datap()->cpu_hibernate = 1; + boot_args * args = (boot_args *) PE_state.bootArgs; + + cpu_datap(0)->cpu_hibernate = 1; header->processorFlags = 0; + + header->runtimePages = args->efiRuntimeServicesPageStart; + header->runtimePageCount = args->efiRuntimeServicesPageCount; + return (KERN_SUCCESS); } void hibernate_vm_lock(void) { - if (FALSE /* getPerProc()->hibernate */) + if (current_cpu_datap()->cpu_hibernate) { vm_page_lock_queues(); mutex_lock(&vm_page_queue_free_lock); @@ -174,7 +203,7 @@ hibernate_vm_lock(void) void hibernate_vm_unlock(void) { - if (FALSE /* getPerProc()->hibernate */) + if (current_cpu_datap()->cpu_hibernate) { mutex_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); diff --git a/osfmk/i386/hibernate_restore.s b/osfmk/i386/hibernate_restore.s index c01f23c50..d718870c8 100644 --- a/osfmk/i386/hibernate_restore.s +++ b/osfmk/i386/hibernate_restore.s @@ -42,34 +42,12 @@ needs to be careful to only touch memory also in the "__HIB" section. .long address ;\ .word segment -#define KVTOPHYS (-KERNELBASE) -#define KVTOLINEAR LINEAR_KERNELBASE - -#define PA(addr) ((addr)+KVTOPHYS) -#define VA(addr) ((addr)-KVTOPHYS) - /* Location of temporary page tables */ -#define HPTD 0x80000 - -#define KERNEL_MAP_SIZE ( 4 * 1024 * 1024) +#define HPTD (0x13000) +#define HPDPT (0x17000) -/* - * fillkpt - * eax = page frame address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillkpt(base, prot) \ - shll $2,%ebx ; \ - addl base,%ebx ; \ - orl $(PTE_V), %eax ; \ - orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $(PAGE_SIZE),%eax ; /* increment physical address */ \ - addl $4,%ebx ; /* next pte */ \ - loop 1b +#define LAST_PAGE (0xFFE00000) +#define LAST_PAGE_PDE (0x7ff) /* * fillpse @@ -80,26 +58,48 @@ needs to be careful to only touch memory also in the "__HIB" section. * prot = protection bits */ #define fillpse(base, prot) \ - shll $2,%ebx ; \ + shll $3,%ebx ; \ addl base,%ebx ; \ - orl $(PTE_V|PTE_PS), %eax ; \ + orl $(PTE_V|PTE_PS|0x60), %eax ; \ orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $(1 << PDESHIFT),%eax ; /* increment physical address 4Mb */ \ + xorl %edx, %edx ; \ +1: movl %eax,(%ebx) ; /* low 32b */ \ + addl $4,%ebx ; \ + movl %edx,(%ebx) ; /* high 32b */ \ + addl $(1 << PDESHIFT),%eax ; /* increment physical address 2Mb */ \ addl $4,%ebx ; /* next entry */ \ loop 1b -/* - * fillkptphys(base, prot) - * eax = physical address - * ecx = how many pages to map - * base = base of page table - * prot = protection bits + + +/* Segment Descriptor + * + * 31 24 19 16 7 0 + * ------------------------------------------------------------ + * | | |B| |A| | | |1|0|E|W|A| | + * | BASE 31..24 |G|/|0|V| LIMIT |P|DPL| TYPE | BASE 23:16 | + * | | |D| |L| 19..16| | |1|1|C|R|A| | + * ------------------------------------------------------------ + * | | | + * | BASE 15..0 | LIMIT 15..0 | + * | | | + * ------------------------------------------------------------ */ -#define fillkptphys(base, prot) \ - movl %eax, %ebx ; \ - shrl $(PAGE_SHIFT), %ebx ; \ - fillkpt(base, prot) + + .align ALIGN +ENTRY(hib_gdt) + .word 0, 0 /* 0x0 : null */ + .byte 0, 0, 0, 0 + + .word 0xffff, 0x0000 /* 0x8 : code */ + .byte 0, 0x9e, 0xcf, 0 + + .word 0xffff, 0x0000 /* 0x10 : data */ + .byte 0, 0x92, 0xcf, 0 + +ENTRY(hib_gdtr) + .word 24 /* limit (8*3 segs) */ + .long EXT(hib_gdt) /* * Hibernation code restarts here. Steal some pages from 0x10000 @@ -120,38 +120,69 @@ LEXT(hibernate_machine_entrypoint) cli mov %eax, %edi - + POSTCODE(0x1) - /* Map physical memory from zero to 0xC0000000 */ + /* Map physical memory from zero to LAST_PAGE */ xorl %eax, %eax xorl %ebx, %ebx - movl $(KPTDI), %ecx + movl $(LAST_PAGE_PDE), %ecx fillpse( $(HPTD), $(PTE_W) ) - /* Map 0 again at 0xC0000000 */ - xorl %eax, %eax - movl $(KPTDI), %ebx - movl $(KERNEL_MAP_SIZE >> PDESHIFT), %ecx - fillpse( $(HPTD), $(PTE_W) ) - - movl $(HPTD), %eax + movl $(HPDPT), %ebx + movl $(HPTD), %eax + orl $(PTE_V), %eax + + xorl %edx, %edx ; \ + + movl %eax,(%ebx) ; /* low 32b */ \ + addl $4,%ebx ; \ + movl %edx,(%ebx) ; /* high 32b */ \ + addl $4,%ebx ; \ + addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ + + movl %eax,(%ebx) ; /* low 32b */ \ + addl $4,%ebx ; \ + movl %edx,(%ebx) ; /* high 32b */ \ + addl $4,%ebx ; \ + addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ + + movl %eax,(%ebx) ; /* low 32b */ \ + addl $4,%ebx ; \ + movl %edx,(%ebx) ; /* high 32b */ \ + addl $4,%ebx ; \ + addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ + + movl %eax,(%ebx) ; /* low 32b */ + addl $4,%ebx ; + movl %edx,(%ebx) ; /* high 32b */ \ + addl $4,%ebx ; \ + addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ + + /* set page dir ptr table addr */ + movl $(HPDPT), %eax movl %eax, %cr3 POSTCODE(0x3) movl %cr4,%eax - orl $(CR4_PSE),%eax + orl $(CR4_PAE|CR4_PGE|CR4_MCE),%eax movl %eax,%cr4 /* enable page size extensions */ + + movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ + movl %cr0, %eax orl $(CR0_PG|CR0_WP|CR0_PE), %eax movl %eax, %cr0 /* ready paging */ POSTCODE(0x4) - lgdt PA(EXT(gdtptr)) /* load GDT */ - lidt PA(EXT(idtptr)) /* load IDT */ - + lgdt EXT(gdtptr) /* load GDT */ + lidt EXT(idtptr) /* load IDT */ + POSTCODE(0x5) LJMP (KERNEL_CS,EXT(hstart)) /* paging on and go to correct vaddr */ @@ -175,23 +206,22 @@ LEXT(hstart) xorl %eax, %eax /* Video memory - N/A */ pushl %eax + pushl %eax + pushl %eax mov %edi, %eax /* Pointer to hibernate header */ pushl %eax call EXT(hibernate_kernel_entrypoint) /* NOTREACHED */ hlt - - /* void hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); */ - .align 5 - .globl EXT(hibernate_restore_phys_page) + .align 5 + .globl EXT(hibernate_restore_phys_page) - /* XXX doesn't handle 64-bit addresses yet */ /* XXX can only deal with exactly one page */ LEXT(hibernate_restore_phys_page) pushl %edi @@ -199,35 +229,39 @@ LEXT(hibernate_restore_phys_page) movl 8+ 4(%esp),%esi /* source virtual address */ addl $0, %esi - jz 2f /* If source == 0, nothing to do */ + jz 3f /* If source == 0, nothing to do */ + movl 8+ 16(%esp),%eax /* destination physical address, high 32 bits */ + movl 8+ 12(%esp),%edi /* destination physical address, low 32 bits */ + addl $0, %eax + jne 1f /* need to map, above LAST_PAGE */ - movl 8+ 12(%esp),%edi /* destination physical address */ - cmpl $(LINEAR_KERNELBASE), %edi - jl 1f /* no need to map, below 0xC0000000 */ - + cmpl $(LAST_PAGE), %edi + jb 2f /* no need to map, below LAST_PAGE */ +1: + /* Map physical address %eax:%edi to virt. address LAST_PAGE (4GB - 2MB) */ + movl %eax, (HPTD + (LAST_PAGE_PDE * 8) + 4) movl %edi, %eax /* destination physical address */ - /* Map physical address to virt. address 0xffc00000 (4GB - 4MB) */ - andl $0xFFC00000, %eax + andl $(LAST_PAGE), %eax orl $(PTE_V | PTE_PS | PTE_W), %eax - movl %eax, (HPTD + (0x3FF * 4)) - orl $0xFFC00000, %edi + movl %eax, (HPTD + (LAST_PAGE_PDE * 8)) + orl $(LAST_PAGE), %edi invlpg (%edi) -1: +2: movl 8+ 20(%esp),%edx /* number of bytes */ cld -/* move longs*/ + /* move longs*/ movl %edx,%ecx sarl $2,%ecx rep movsl -/* move bytes*/ + /* move bytes*/ movl %edx,%ecx andl $3,%ecx rep movsb -2: +3: popl %esi popl %edi ret diff --git a/osfmk/i386/hpet.c b/osfmk/i386/hpet.c new file mode 100644 index 000000000..46ddefa7d --- /dev/null +++ b/osfmk/i386/hpet.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#endif /* MACH_KDB */ +#include + +/* Decimal powers: */ +#define kilo (1000ULL) +#define Mega (kilo * kilo) +#define Giga (kilo * Mega) +#define Tera (kilo * Giga) +#define Peta (kilo * Tera) + +uint32_t hpetArea = 0; +uint32_t hpetAreap = 0; +uint64_t hpetFemto = 0; +uint64_t hpetFreq = 0; +uint64_t hpetCvt = 0; /* (TAKE OUT LATER) */ +uint64_t hpetCvtt2n = 0; +uint64_t hpetCvtn2t = 0; +uint64_t tsc2hpet = 0; +uint64_t hpet2tsc = 0; +uint64_t bus2hpet = 0; +uint64_t hpet2bus = 0; + +uint32_t rcbaArea = 0; +uint32_t rcbaAreap = 0; + +#if DEBUG +#define DBG(x...) kprintf("DBG: " x) +#else +#define DBG(x...) +#endif + +/* + * Map the RCBA area. + */ +static void +map_rcbaArea(void) +{ + /* + * Get RCBA area physical address and map it + */ + outl(cfgAdr, lpcCfg | (0xF0 & 0xFC)); + rcbaAreap = inl(cfgDat | (0xF0 & 0x03)); + rcbaArea = io_map_spec(rcbaAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO); + kprintf("RCBA: vaddr = %08X, paddr = %08X\n", rcbaArea, rcbaAreap); +} + +/* + * Initialize the HPET + */ +void +hpet_init(void) +{ + unsigned int *xmod; + + map_rcbaArea(); + + /* + * Is the HPET memory already enabled? + * If not, set address and enable. + */ + xmod = (uint32_t *)(rcbaArea + 0x3404); /* Point to the HPTC */ + uint32_t hptc = *xmod; /* Get HPET config */ + DBG(" current RCBA.HPTC: %08X\n", *xmod); + if(!(hptc & hptcAE)) { + DBG("HPET memory is not enabled, " + "enabling and assigning to 0xFED00000 (hope that's ok)\n"); + *xmod = (hptc & ~3) | hptcAE; + } + + /* + * Get physical address of HPET and map it. + */ + hpetAreap = hpetAddr | ((hptc & 3) << 12); + hpetArea = io_map_spec(hpetAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO); + kprintf("HPET: vaddr = %08X, paddr = %08X\n", hpetArea, hpetAreap); + + /* + * Extract the HPET tick rate. + * The period of the HPET is reported in femtoseconds (10**-15s) + * and convert to frequency in hertz. + */ + hpetFemto = (uint32_t)(((hpetReg_t *)hpetArea)->GCAP_ID >> 32); + hpetFreq = (1 * Peta) / hpetFemto; + + /* + * The conversion factor is the number of nanoseconds per HPET tick + * with about 32 bits of fraction. The value is converted to a + * base-2 fixed point number. To convert from HPET to nanoseconds, + * multiply the value by the conversion factor using 96-bit arithmetic, + * then shift right 32 bits. If the value is known to be small, + * 64-bit arithmetic will work. + */ + + /* + * Begin conversion of base 10 femtoseconds to base 2, calculate: + * - HPET ticks to nanoseconds conversion in base 2 fraction (* 2**32) + * - nanoseconds to HPET ticks conversion + */ + hpetCvtt2n = (uint64_t)hpetFemto << 32; + hpetCvtt2n = hpetCvtt2n / 1000000ULL; + hpetCvtn2t = 0xFFFFFFFFFFFFFFFFULL / hpetCvtt2n; + kprintf("HPET: Frequency = %6d.%04dMHz, " + "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n", + (uint32_t)(hpetFreq / Mega), (uint32_t)(hpetFreq % Mega), + (uint32_t)(hpetCvtt2n >> 32), (uint32_t)hpetCvtt2n, + (uint32_t)(hpetCvtn2t >> 32), (uint32_t)hpetCvtn2t); + + + /* (TAKE OUT LATER) + * Begin conversion of base 10 femtoseconds to base 2 + * HPET ticks to nanoseconds in base 2 fraction (times 1048576) + */ + hpetCvt = (uint64_t)hpetFemto << 20; + hpetCvt = hpetCvt / 1000000ULL; + + /* Calculate conversion from TSC to HPET */ + tsc2hpet = tmrCvt(tscFCvtt2n, hpetCvtn2t); + DBG(" CVT: TSC to HPET = %08X.%08X\n", + (uint32_t)(tsc2hpet >> 32), (uint32_t)tsc2hpet); + + /* Calculate conversion from HPET to TSC */ + hpet2tsc = tmrCvt(hpetCvtt2n, tscFCvtn2t); + DBG(" CVT: HPET to TSC = %08X.%08X\n", + (uint32_t)(hpet2tsc >> 32), (uint32_t)hpet2tsc); + + /* Calculate conversion from BUS to HPET */ + bus2hpet = tmrCvt(busFCvtt2n, hpetCvtn2t); + DBG(" CVT: BUS to HPET = %08X.%08X\n", + (uint32_t)(bus2hpet >> 32), (uint32_t)bus2hpet); + + /* Calculate conversion from HPET to BUS */ + hpet2bus = tmrCvt(hpetCvtt2n, busFCvtn2t); + DBG(" CVT: HPET to BUS = %08X.%08X\n", + (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus); + + /* Make sure the counter is off in the HPET configuration flags */ + uint64_t hpetcon = ((hpetReg_t *)hpetArea)->GEN_CONF; + hpetcon = hpetcon & ~1; + ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon; + + /* + * Convert current TSC to HPET value, + * set it, and start it ticking. + */ + uint64_t currtsc = rdtsc64(); + uint64_t tscInHPET = tmrCvt(currtsc, tsc2hpet); + ((hpetReg_t *)hpetArea)->MAIN_CNT = tscInHPET; + hpetcon = hpetcon | 1; + ((hpetReg_t *)hpetArea)->GEN_CONF = hpetcon; + kprintf("HPET started: TSC = %08X.%08X, HPET = %08X.%08X\n", + (uint32_t)(currtsc >> 32), (uint32_t)currtsc, + (uint32_t)(tscInHPET >> 32), (uint32_t)tscInHPET); + +#if MACH_KDB + db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */ +#endif +} + +/* + * This routine is used to get various information about the HPET + * without having to export gobs of globals. It fills in a data + * structure with the info. + */ +void +hpet_get_info(hpetInfo_t *info) +{ + info->hpetCvtt2n = hpetCvtt2n; + info->hpetCvtn2t = hpetCvtn2t; + info->tsc2hpet = tsc2hpet; + info->hpet2tsc = hpet2tsc; + info->bus2hpet = bus2hpet; + info->hpet2bus = hpet2bus; + info->rcbaArea = rcbaArea; + info->rcbaAreap = rcbaAreap; +} + + +/* + * This routine is called by the HPET driver + * when it assigns an HPET timer to a processor + */ + +void +ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect) +{ + uint64_t *hpetVaddr; + uint64_t hpetcnf; + + if(cpu > 1) { + panic("ml_hpet_cfg: invalid cpu = %d\n", cpu); + } + + /* Calculate address of the HPET for this processor */ + hpetVaddr = (uint64_t *)(((uint32_t)&(((hpetReg_t *)hpetArea)->TIM1_CONF)) + (cpu << 5)); + + DBG("ml_hpet_cfg: HPET for cpu %d at %08X, vector = %d\n", + cpu, hpetVaddr, hpetVect); + + /* Save the address and vector of the HPET for this processor */ + cpu_data_ptr[cpu]->cpu_pmHpet = (uint64_t *)hpetVaddr; + cpu_data_ptr[cpu]->cpu_pmHpetVec = hpetVect; + + /* Enable the interruptions now that we have a vector */ + hpetcnf = *hpetVaddr; + hpetcnf = hpetcnf | Tn_INT_ENB_CNF; + *hpetVaddr = hpetcnf; + + /* Save the configuration */ + cpu_data_ptr[cpu]->cpu_pmStats.pmHpetCfg = hpetcnf; + cpu_data_ptr[cpu]->cpu_pmStats.pmHpetCmp = 0; + + /* See if nap policy has changed now */ + machine_nap_policy(); + +} + +/* + * This is the HPET interrupt handler. + * + * We really don't want to be here, but so far, I haven't figured out + * a way to cancel the interrupt. Hopefully, some day we will figure out + * how to do that or switch all timers to the HPET. + */ +int +HPETInterrupt(void) +{ + + /* All we do here is to bump the count */ + current_cpu_datap()->cpu_pmStats.pmHPETRupt++; + + /* Return and show that the 'rupt has been handled... */ + return 1; +} + + +static hpetReg_t saved_hpet; + +void hpet_save( void ) +{ + hpetReg_t *from = (hpetReg_t *) hpetArea; + hpetReg_t *to = &saved_hpet; + + to->GEN_CONF = from->GEN_CONF; + to->TIM0_CONF = from->TIM0_CONF; + to->TIM0_COMP = from->TIM0_COMP; + to->TIM1_CONF = from->TIM1_CONF; + to->TIM1_COMP = from->TIM1_COMP; + to->TIM2_CONF = from->TIM2_CONF; + to->TIM2_COMP = from->TIM2_COMP; + to->MAIN_CNT = from->MAIN_CNT; +} + +void hpet_restore( void ) +{ + hpetReg_t *from = &saved_hpet; + hpetReg_t *to = (hpetReg_t *) hpetArea; + + /* + * Is the HPET memory already enabled? + * If not, set address and enable. + */ + uint32_t *hptcp = (uint32_t *)(rcbaArea + 0x3404); + uint32_t hptc = *hptcp; + if(!(hptc & hptcAE)) { + DBG("HPET memory is not enabled, " + "enabling and assigning to 0xFED00000 (hope that's ok)\n"); + *hptcp = (hptc & ~3) | hptcAE; + } + + to->GEN_CONF = from->GEN_CONF & ~1; + + to->TIM0_CONF = from->TIM0_CONF; + to->TIM0_COMP = from->TIM0_COMP; + to->TIM1_CONF = from->TIM1_CONF; + to->TIM1_COMP = from->TIM1_COMP; + to->TIM2_CONF = from->TIM2_CONF; + to->TIM2_COMP = from->TIM2_COMP; + to->GINTR_STA = -1ULL; + to->MAIN_CNT = from->MAIN_CNT; + + to->GEN_CONF = from->GEN_CONF; +} + +/* + * Read the HPET timer + * + */ +uint64_t +rdHPET(void) +{ + hpetReg_t *hpetp = (hpetReg_t *) hpetArea; + volatile uint32_t *regp = (uint32_t *) &hpetp->MAIN_CNT; + uint32_t high; + uint32_t low; + + do { + high = *(regp + 1); + low = *regp; + } while (high != *(regp + 1)); + + return (((uint64_t) high) << 32) | low; +} + +#if MACH_KDB + +#define HI32(x) ((uint32_t)(((x) >> 32) & 0xFFFFFFFF)) +#define LO32(x) ((uint32_t)((x) & 0xFFFFFFFF)) + +/* + * Displays HPET memory mapped area + * hp + */ +void +db_hpet(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif) +{ + + db_display_hpet((hpetReg_t *) hpetArea); /* Dump out the HPET + * stuff */ + return; +} + +void +db_display_hpet(hpetReg_t * hpt) +{ + + uint64_t cmain; + + cmain = hpt->MAIN_CNT; /* Get the main timer */ + + /* General capabilities */ + db_printf(" GCAP_ID = %08X.%08X\n", + HI32(hpt->GCAP_ID), LO32(hpt->GCAP_ID)); + /* General configuration */ + db_printf(" GEN_CONF = %08X.%08X\n", + HI32(hpt->GEN_CONF), LO32(hpt->GEN_CONF)); + /* General Interrupt status */ + db_printf("GINTR_STA = %08X.%08X\n", + HI32(hpt->GINTR_STA), LO32(hpt->GINTR_STA)); + /* Main counter */ + db_printf(" MAIN_CNT = %08X.%08X\n", + HI32(cmain), LO32(cmain)); + /* Timer 0 config and cap */ + db_printf("TIM0_CONF = %08X.%08X\n", + HI32(hpt->TIM0_CONF), LO32(hpt->TIM0_CONF)); + /* Timer 0 comparator */ + db_printf("TIM0_COMP = %08X.%08X\n", + HI32(hpt->TIM0_COMP), LO32(hpt->TIM0_COMP)); + /* Timer 1 config and cap */ + db_printf("TIM0_CONF = %08X.%08X\n", + HI32(hpt->TIM1_CONF), LO32(hpt->TIM1_CONF)); + /* Timer 1 comparator */ + db_printf("TIM1_COMP = %08X.%08X\n", + HI32(hpt->TIM1_COMP), LO32(hpt->TIM1_COMP)); + /* Timer 2 config and cap */ + db_printf("TIM2_CONF = %08X.%08X\n", + HI32(hpt->TIM2_CONF), LO32(hpt->TIM2_CONF)); + /* Timer 2 comparator */ + db_printf("TIM2_COMP = %08X.%08X\n", + HI32(hpt->TIM2_COMP), LO32(hpt->TIM2_COMP)); + + db_printf("\nHPET Frequency = %d.%05dMHz\n", + (uint32_t) (hpetFreq / 1000000), (uint32_t) (hpetFreq % 1000000)); + + return; + +} + +#endif diff --git a/osfmk/i386/hpet.h b/osfmk/i386/hpet.h new file mode 100644 index 000000000..f3595dce1 --- /dev/null +++ b/osfmk/i386/hpet.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL_PRIVATE +#ifndef _I386_HPET_H_ +#define _I386_HPET_H_ + +/* + * HPET kernel functions to support the HPET KEXT and the + * power management KEXT. + */ + + +/* + * Memory mapped registers for the HPET + */ +typedef struct hpetReg { + uint64_t GCAP_ID; /* General capabilities */ + uint64_t rsv1; + uint64_t GEN_CONF; /* General configuration */ + uint64_t rsv2; + uint64_t GINTR_STA; /* General Interrupt status */ + uint64_t rsv3[25]; + uint64_t MAIN_CNT; /* Main counter */ + uint64_t rsv4; + uint64_t TIM0_CONF; /* Timer 0 config and cap */ +#define TIM_CONF 0 +#define Tn_INT_ENB_CNF 4 + uint64_t TIM0_COMP; /* Timer 0 comparator */ +#define TIM_COMP 8 + uint64_t rsv5[2]; + uint64_t TIM1_CONF; /* Timer 1 config and cap */ + uint64_t TIM1_COMP; /* Timer 1 comparator */ + uint64_t rsv6[2]; + uint64_t TIM2_CONF; /* Timer 2 config and cap */ + uint64_t TIM2_COMP; /* Timer 2 comparator */ + uint64_t rsv7[2]; +} hpetReg; +typedef struct hpetReg hpetReg_t; + +struct hpetInfo +{ + uint64_t hpetCvtt2n; + uint64_t hpetCvtn2t; + uint64_t tsc2hpet; + uint64_t hpet2tsc; + uint64_t bus2hpet; + uint64_t hpet2bus; + uint32_t rcbaArea; + uint32_t rcbaAreap; +}; +typedef struct hpetInfo hpetInfo_t; + +extern uint64_t hpetFemto; +extern uint64_t hpetFreq; +extern uint64_t hpetCvtt2n; +extern uint64_t hpetCvtn2t; +extern uint64_t tsc2hpet; +extern uint64_t hpet2tsc; +extern uint64_t bus2hpet; +extern uint64_t hpet2bus; + +extern uint32_t rcbaArea; +extern uint32_t rcbaAreap; + +extern void map_rcbaAread(void); +extern void hpet_init(void); + +extern void hpet_save(void); +extern void hpet_restore(void); + +#ifdef XNU_KERNEL_PRIVATE +extern int HPETInterrupt(void); +#endif + +extern uint64_t rdHPET(void); +extern void hpet_get_info(hpetInfo_t *info); + +#define hpetAddr 0xFED00000 +#define hptcAE 0x80 + +#endif /* _I386_HPET_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 78d4cef00..46cd0b4f7 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,7 +50,6 @@ #include #include -#include #include @@ -65,120 +64,100 @@ #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include +#include #include #include +#include +#include +#include +#include #if MACH_KDB #include #endif /* MACH_KDB */ #include -#ifdef __MACHO__ -#include -static KernelBootArgs_t *kernelBootArgs; -#endif +static boot_args *kernelBootArgs; -vm_offset_t boot_args_start = 0; /* pointer to kernel arguments, set in start.s */ - -#ifdef __MACHO__ -#include -vm_offset_t edata, etext, end; +extern int disableConsoleOutput; +extern const char version[]; +extern const char version_variant[]; +extern int nx_enabled; -/* operations only against currently loaded 32 bit mach kernel */ -extern struct segment_command *getsegbyname(const char *); -extern struct section *firstsect(struct segment_command *); -extern struct section *nextsect(struct segment_command *, struct section *); +extern int noVMX; /* if set, rosetta should not emulate altivec */ -/* - * Called first for a mach-o kernel before paging is set up. - * Returns the first available physical address in memory. - */ +void cpu_stack_set(void); -void -i386_preinit(void) -{ - struct segment_command *sgp; - struct section *sp; - struct KernelBootArgs *pp; - int i; - - sgp = getsegbyname("__DATA"); - if (sgp) { - sp = firstsect(sgp); - if (sp) { - do { - if ((sp->flags & S_ZEROFILL)) - bzero((char *) sp->addr, sp->size); - } while ((sp = nextsect(sgp, sp))); - } - } - - kernelBootArgs = (KernelBootArgs_t *) - ml_static_ptovirt(boot_args_start); - pp = (struct KernelBootArgs *) kernelBootArgs; - pp->configEnd = (char *) - ml_static_ptovirt((vm_offset_t) pp->configEnd); - for (i = 0; i < pp->numBootDrivers; i++) { - pp->driverConfig[i].address = (unsigned) - ml_static_ptovirt(pp->driverConfig[i].address); - } - return; -} -#endif - -extern const char version[]; -extern const char version_variant[]; /* * Cpu initialization. Running virtual, but without MACH VM - * set up. First C routine called, unless i386_preinit() was called first. + * set up. First C routine called. */ void -i386_init(void) +i386_init(vm_offset_t boot_args_start) { unsigned int maxmem; + uint64_t maxmemtouse; unsigned int cpus; + boolean_t legacy_mode; postcode(I386_INIT_ENTRY); - master_cpu = 0; - cpu_data_alloc(TRUE); - cpu_init(); - postcode(CPU_INIT_D); + i386_macho_zerofill(); /* - * Setup some processor related structures to satisfy funnels. - * Must be done before using unparallelized device drivers. + * Setup boot args given the physical start address. */ - processor_bootstrap(); + kernelBootArgs = (boot_args *) + ml_static_ptovirt(boot_args_start); + kernelBootArgs->MemoryMap = (uint32_t) + ml_static_ptovirt((vm_offset_t)kernelBootArgs->MemoryMap); + kernelBootArgs->deviceTreeP = (uint32_t) + ml_static_ptovirt((vm_offset_t)kernelBootArgs->deviceTreeP); + + master_cpu = 0; + (void) cpu_data_alloc(TRUE); + cpu_init(); + postcode(CPU_INIT_D); + /* init processor performance control */ + pmsInit(); + PE_init_platform(FALSE, kernelBootArgs); postcode(PE_INIT_PLATFORM_D); - /* - * Set up initial thread so current_thread() works early on - */ - thread_bootstrap(); - postcode(THREAD_BOOTSTRAP_D); - printf_init(); /* Init this in case we need debugger */ panic_init(); /* Init this in case we need debugger */ /* setup debugging output if one has been chosen */ PE_init_kprintf(FALSE); + if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) + dgWork.dgFlags = 0; + + serialmode = 0; + if(PE_parse_boot_arg("serial", &serialmode)) { + /* We want a serial keyboard and/or console */ + kprintf("Serial mode specified: %08X\n", serialmode); + } + if(serialmode & 1) { + (void)switch_to_serial_console(); + disableConsoleOutput = FALSE; /* Allow printfs to happen */ + } + /* setup console output */ PE_init_printf(FALSE); @@ -190,22 +169,55 @@ i386_init(void) * The maximum number of cpus must be set beforehand. */ if (!PE_parse_boot_arg("maxmem", &maxmem)) - maxmem=0; + maxmemtouse=0; else - maxmem = maxmem * (1024 * 1024); + maxmemtouse = ((uint64_t)maxmem) * (uint64_t)(1024 * 1024); if (PE_parse_boot_arg("cpus", &cpus)) { if ((0 < cpus) && (cpus < max_ncpus)) max_ncpus = cpus; } - i386_vm_init(maxmem, kernelBootArgs); + /* + * debug support for > 4G systems + */ + if (!PE_parse_boot_arg("himemory_mode", &vm_himemory_mode)) + vm_himemory_mode = 0; + + /* + * At this point we check whether we are a 64-bit processor + * and that we're not restricted to legacy mode, 32-bit operation. + */ + boolean_t IA32e = FALSE; + if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) { + kprintf("EM64T supported"); + if (PE_parse_boot_arg("-legacy", &legacy_mode)) { + kprintf(" but legacy mode forced\n"); + } else { + IA32e = TRUE; + kprintf(" and will be enabled\n"); + } + } + if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) + nx_enabled = 0; + + i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); + + if ( ! PE_parse_boot_arg("novmx", &noVMX)) + noVMX = 0; /* OK to support Altivec in rosetta? */ + + tsc_init(); + hpet_init(); + power_management_init(); PE_init_platform(TRUE, kernelBootArgs); /* create the console for verbose or pretty mode */ PE_create_console(); + processor_bootstrap(); + thread_bootstrap(); + machine_startup(); } diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index d8d36ebbd..149cf370b 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -124,11 +124,6 @@ /* * Routines for general lock debugging. */ -#define S_TYPE SLOCK_TYPE(%edx) -#define S_PC SLOCK_PC(%edx) -#define S_THREAD SLOCK_THREAD(%edx) -#define S_DURATIONH SLOCK_DURATIONH(%edx) -#define S_DURATIONL SLOCK_DURATIONL(%edx) /* * Checks for expected lock types and calls "panic" on @@ -146,17 +141,6 @@ .text ; \ 1: -#define CHECK_SIMPLE_LOCK_TYPE() \ - cmpl $ USLOCK_TAG,S_TYPE ; \ - je 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ - hlt ; \ - .data ; \ -2: String "not a simple lock!" ; \ - .text ; \ -1: - /* * If one or more simplelocks are currently held by a thread, * an attempt to acquire a mutex will cause this check to fail @@ -261,8 +245,9 @@ LEAF_ENTRY(hw_lock_init) LEAF_ENTRY(hw_lock_lock) movl L_ARG0,%edx /* fetch lock pointer */ - movl L_PC,%ecx -1: DISABLE_PREEMPTION + movl %gs:CPU_ACTIVE_THREAD,%ecx + DISABLE_PREEMPTION +1: movl 0(%edx), %eax testl %eax,%eax /* lock locked? */ jne 3f /* branch if so */ @@ -270,8 +255,7 @@ LEAF_ENTRY(hw_lock_lock) jne 3f movl $1,%eax /* In case this was a timeout call */ LEAF_RET /* if yes, then nothing left to do */ - -3: ENABLE_PREEMPTION /* no reason we can't be preemptable */ +3: PAUSE /* pause for hyper-threading */ jmp 1b /* try again */ @@ -284,7 +268,7 @@ LEAF_ENTRY(hw_lock_lock) LEAF_ENTRY(hw_lock_to) 1: movl L_ARG0,%edx /* fetch lock pointer */ - movl L_PC,%ecx + movl %gs:CPU_ACTIVE_THREAD,%ecx /* * Attempt to grab the lock immediately * - fastpath without timeout nonsense. @@ -315,8 +299,6 @@ LEAF_ENTRY(hw_lock_to) adcl $0,%edx /* add carry */ mov %edx,%ecx mov %eax,%ebx /* %ecx:%ebx is the timeout expiry */ -3: - ENABLE_PREEMPTION /* no reason not to be preempted now */ 4: /* * The inner-loop spin to look for the lock being freed. @@ -337,7 +319,7 @@ LEAF_ENTRY(hw_lock_to) cmpl %ecx,%edx /* compare high-order 32-bits */ jb 4b /* continue spinning if less, or */ cmpl %ebx,%eax /* compare low-order 32-bits */ - jb 5b /* continue if less, else bail */ + jb 4b /* continue if less, else bail */ xor %eax,%eax /* with 0 return value */ pop %ebx pop %edi @@ -348,10 +330,9 @@ LEAF_ENTRY(hw_lock_to) * Here to try to grab the lock that now appears to be free * after contention. */ - movl 8+L_PC,%edx /* calling pc (8+ for pushed regs) */ - DISABLE_PREEMPTION + movl %gs:CPU_ACTIVE_THREAD,%edx lock; cmpxchgl %edx,0(%edi) /* try to acquire the HW lock */ - jne 3b /* no - spin again */ + jne 4b /* no - spin again */ movl $1,%eax /* yes */ pop %ebx pop %edi @@ -376,7 +357,7 @@ LEAF_ENTRY(hw_lock_unlock) LEAF_ENTRY(hw_lock_try) movl L_ARG0,%edx /* fetch lock pointer */ - movl L_PC,%ecx + movl %gs:CPU_ACTIVE_THREAD,%ecx DISABLE_PREEMPTION movl 0(%edx),%eax testl %eax,%eax @@ -387,7 +368,8 @@ LEAF_ENTRY(hw_lock_try) movl $1,%eax /* success */ LEAF_RET -1: ENABLE_PREEMPTION /* failure: release preemption... */ +1: + ENABLE_PREEMPTION /* failure: release preemption... */ xorl %eax,%eax /* ...and return failure */ LEAF_RET @@ -431,23 +413,21 @@ NONLEAF_ENTRY2(mutex_lock,_mutex_lock) pushf /* save interrupt state */ cli /* disable interrupts */ +Lml_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -ml_retry: - movl B_PC,%ecx - -ml_get_hw: +Lml_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp ml_get_hw /* try again */ -1: + jne Lml_ilk_fail /* no - take the slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne ml_get_hw /* branch on failure to retry */ + jne Lml_get_hw /* branch on failure to retry */ movl M_LOCKED,%ecx /* get lock owner */ testl %ecx,%ecx /* is the mutex locked? */ - jne ml_fail /* yes, we lose */ + jne Lml_fail /* yes, we lose */ +Lml_acquire: movl %gs:CPU_ACTIVE_THREAD,%ecx movl %ecx,M_LOCKED @@ -457,28 +437,94 @@ ml_get_hw: movl %ecx,M_PC #endif + cmpw $0,M_WAITERS /* are there any waiters? */ + jne Lml_waiters /* yes, more work to do */ +Lml_return: + xorl %eax,%eax + movl %eax,M_ILK + + popf /* restore interrupt state */ + + NONLEAF_RET + +Lml_waiters: pushl %edx /* save mutex address */ pushl %edx call EXT(lck_mtx_lock_acquire) addl $4,%esp popl %edx /* restore mutex address */ + jmp Lml_return + +Lml_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Lml_retry /* try again */ +Lml_fail: + /* + n Check if the owner is on another processor and therefore + * we should try to spin before blocking. + */ + testl $(OnProc),ACT_SPF(%ecx) + jz Lml_block + + /* + * Here if owner is on another processor: + * - release the interlock + * - spin on the holder until release or timeout + * - in either case re-acquire the interlock + * - if released, acquire it + * - otherwise drop thru to block. + */ xorl %eax,%eax - movl %eax,M_ILK + movl %eax,M_ILK /* zero interlock */ + popf + pushf /* restore interrupt state */ - popf /* restore interrupt state */ + push %edx /* lock address */ + call EXT(lck_mtx_lock_spin) /* call out to do spinning */ + addl $4,%esp + movl B_ARG0,%edx /* refetch mutex address */ - NONLEAF_RET + /* Re-acquire interlock */ + cli /* disable interrupts */ +Lml_reget_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -ml_fail: -ml_block: +Lml_reget_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + jne Lml_ilk_refail /* no - slow path */ + + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne Lml_reget_hw /* branch on failure to retry */ + + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex free? */ + je Lml_acquire /* yes, acquire */ + +Lml_block: CHECK_MYLOCK(M_THREAD) pushl M_LOCKED pushl %edx /* push mutex address */ call EXT(lck_mtx_lock_wait) /* wait for the lock */ addl $8,%esp movl B_ARG0,%edx /* refetch mutex address */ - jmp ml_retry /* and try again */ + cli /* ensure interrupts disabled */ + jmp Lml_retry /* and try again */ + +Lml_ilk_refail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Lml_reget_retry /* try again */ NONLEAF_ENTRY2(mutex_try,_mutex_try) @@ -487,24 +533,22 @@ NONLEAF_ENTRY2(mutex_try,_mutex_try) CHECK_MUTEX_TYPE() CHECK_NO_SIMPLELOCKS() - movl B_PC,%ecx - pushf /* save interrupt state */ cli /* disable interrupts */ +Lmt_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -mt_get_hw: +Lmt_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp mt_get_hw /* try again */ -1: + jne Lmt_ilk_fail /* no - slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne mt_get_hw /* branch on failure to retry */ + jne Lmt_get_hw /* branch on failure to retry */ movl M_LOCKED,%ecx /* get lock owner */ testl %ecx,%ecx /* is the mutex locked? */ - jne mt_fail /* yes, we lose */ + jne Lmt_fail /* yes, we lose */ movl %gs:CPU_ACTIVE_THREAD,%ecx movl %ecx,M_LOCKED @@ -514,22 +558,35 @@ mt_get_hw: movl %ecx,M_PC #endif - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - + cmpl $0,M_WAITERS /* are there any waiters? */ + jne Lmt_waiters /* yes, more work to do */ +Lmt_return: xorl %eax,%eax movl %eax,M_ILK - popf /* restore interrupt state */ movl $1,%eax NONLEAF_RET -mt_fail: +Lmt_waiters: + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_acquire) + addl $4,%esp + popl %edx /* restore mutex address */ + jmp Lmt_return + +Lmt_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Lmt_retry /* try again */ + +Lmt_fail: xorl %eax,%eax movl %eax,M_ILK @@ -545,25 +602,23 @@ NONLEAF_ENTRY(mutex_unlock) CHECK_MUTEX_TYPE() CHECK_THREAD(M_THREAD) - movl B_PC,%ecx - pushf /* save interrupt state */ cli /* disable interrupts */ +Lmu_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -mu_get_hw: +Lmu_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp mu_get_hw /* try again */ -1: + jne Lmu_ilk_fail /* no - slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne mu_get_hw /* branch on failure to retry */ + jne Lmu_get_hw /* branch on failure to retry */ cmpw $0,M_WAITERS /* are there any waiters? */ - jne mu_wakeup /* yes, more work to do */ + jne Lmu_wakeup /* yes, more work to do */ -mu_doit: +Lmu_doit: #if MACH_LDEBUG movl $0,M_THREAD /* disown thread */ @@ -578,13 +633,22 @@ mu_doit: NONLEAF_RET -mu_wakeup: +Lmu_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Lmu_retry /* try again */ + +Lmu_wakeup: pushl M_LOCKED pushl %edx /* push mutex address */ call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ addl $8,%esp movl B_ARG0,%edx /* restore lock pointer */ - jmp mu_doit + jmp Lmu_doit /* * lck_mtx_lock() @@ -605,40 +669,96 @@ NONLEAF_ENTRY(lck_mtx_lock) pushf /* save interrupt state */ cli /* disable interrupts */ +Llml_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -lml_retry: - movl B_PC,%ecx - -lml_get_hw: +Llml_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp lml_get_hw /* try again */ -1: + jne Llml_ilk_fail /* no - slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne lml_get_hw /* branch on failure to retry */ + jne Llml_get_hw /* branch on failure to retry */ movl M_LOCKED,%ecx /* get lock owner */ testl %ecx,%ecx /* is the mutex locked? */ - jne lml_fail /* yes, we lose */ + jne Llml_fail /* yes, we lose */ +Llml_acquire: movl %gs:CPU_ACTIVE_THREAD,%ecx movl %ecx,M_LOCKED + cmpl $0,M_WAITERS /* are there any waiters? */ + jne Llml_waiters /* yes, more work to do */ +Llml_return: + xorl %eax,%eax + movl %eax,M_ILK + + popf /* restore interrupt state */ + + NONLEAF_RET + +Llml_waiters: pushl %edx /* save mutex address */ pushl %edx call EXT(lck_mtx_lock_acquire) addl $4,%esp popl %edx /* restore mutex address */ + jmp Llml_return + +Llml_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Llml_retry /* try again */ +Llml_fail: + /* + * Check if the owner is on another processor and therefore + * we should try to spin before blocking. + */ + testl $(OnProc),ACT_SPF(%ecx) + jz Llml_block + + /* + * Here if owner is on another processor: + * - release the interlock + * - spin on the holder until release or timeout + * - in either case re-acquire the interlock + * - if released, acquire it + * - otherwise drop thru to block. + */ xorl %eax,%eax - movl %eax,M_ILK + movl %eax,M_ILK /* zero interlock */ + popf + pushf /* restore interrupt state */ - popf /* restore interrupt state */ + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_spin) + addl $4,%esp + popl %edx /* restore mutex address */ - NONLEAF_RET + /* Re-acquire interlock */ + cli /* disable interrupts */ +Llml_reget_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -lml_fail: +Llml_reget_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + jne Llml_ilk_refail /* no - slow path */ + + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne Llml_reget_hw /* branch on failure to retry */ + + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex free? */ + je Llml_acquire /* yes, acquire */ + +Llml_block: CHECK_MYLOCK(M_THREAD) pushl %edx /* save mutex address */ pushl M_LOCKED @@ -646,7 +766,17 @@ lml_fail: call EXT(lck_mtx_lock_wait) /* wait for the lock */ addl $8,%esp popl %edx /* restore mutex address */ - jmp lml_retry /* and try again */ + cli /* ensure interrupts disabled */ + jmp Llml_retry /* and try again */ + +Llml_ilk_refail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Llml_reget_retry /* try again */ NONLEAF_ENTRY(lck_mtx_try_lock) @@ -657,33 +787,28 @@ NONLEAF_ENTRY(lck_mtx_try_lock) CHECK_NO_SIMPLELOCKS() CHECK_PREEMPTION_LEVEL() - movl B_PC,%ecx - pushf /* save interrupt state */ cli /* disable interrupts */ +Llmt_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -lmt_get_hw: +Llmt_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp lmt_get_hw /* try again */ -1: + jne Llmt_ilk_fail /* no - slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne lmt_get_hw /* branch on failure to retry */ + jne Llmt_get_hw /* branch on failure to retry */ movl M_LOCKED,%ecx /* get lock owner */ testl %ecx,%ecx /* is the mutex locked? */ - jne lmt_fail /* yes, we lose */ + jne Llmt_fail /* yes, we lose */ movl %gs:CPU_ACTIVE_THREAD,%ecx movl %ecx,M_LOCKED - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - + cmpl $0,M_WAITERS /* are there any waiters? */ + jne Llmt_waiters /* yes, more work to do */ +Llmt_return: xorl %eax,%eax movl %eax,M_ILK @@ -692,7 +817,24 @@ lmt_get_hw: movl $1,%eax /* return success */ NONLEAF_RET -lmt_fail: +Llmt_waiters: + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_acquire) + addl $4,%esp + popl %edx /* restore mutex address */ + jmp Llmt_return + +Llmt_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Llmt_retry /* try again */ + +Llmt_fail: xorl %eax,%eax movl %eax,M_ILK @@ -707,25 +849,23 @@ NONLEAF_ENTRY(lck_mtx_unlock) cmpl $(MUTEX_IND),M_ITAG /* is this indirect? */ cmove M_PTR,%edx /* yes - take indirection */ - movl B_PC,%ecx - pushf /* save interrupt state */ cli /* disable interrupts */ +Llmu_retry: + movl %gs:CPU_ACTIVE_THREAD,%ecx -lmu_get_hw: +Llmu_get_hw: movl M_ILK,%eax /* read interlock */ testl %eax,%eax /* unlocked? */ - je 1f /* yes - attempt to lock it */ - PAUSE /* no - pause */ - jmp lmu_get_hw /* try again */ -1: + jne Llmu_ilk_fail /* no - slow path */ + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne lmu_get_hw /* branch on failure to retry */ + jne Llmu_get_hw /* branch on failure to retry */ cmpw $0,M_WAITERS /* are there any waiters? */ - jne lmu_wakeup /* yes, more work to do */ + jne Llmu_wakeup /* yes, more work to do */ -lmu_doit: +Llmu_doit: xorl %ecx,%ecx movl %ecx,M_LOCKED /* unlock the mutex */ @@ -735,14 +875,23 @@ lmu_doit: NONLEAF_RET -lmu_wakeup: +Llmu_ilk_fail: + /* + * Slow path: call out to do the spinning. + */ + pushl %edx /* lock address */ + call EXT(lck_mtx_interlock_spin) + popl %edx /* lock pointer */ + jmp Llmu_retry /* try again */ + +Llmu_wakeup: pushl %edx /* save mutex address */ pushl M_LOCKED pushl %edx /* push mutex address */ call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ addl $8,%esp popl %edx /* restore mutex pointer */ - jmp lmu_doit + jmp Llmu_doit LEAF_ENTRY(lck_mtx_ilk_unlock) movl L_ARG0,%edx /* no indirection here */ @@ -840,14 +989,14 @@ LEAF_ENTRY(i_bit_set) movl L_ARG0,%edx movl L_ARG1,%eax lock - bts %dl,(%eax) + bts %edx,(%eax) LEAF_RET LEAF_ENTRY(i_bit_clear) movl L_ARG0,%edx movl L_ARG1,%eax lock - btr %dl,(%eax) + btr %edx,(%eax) LEAF_RET LEAF_ENTRY(bit_lock) diff --git a/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h b/osfmk/i386/i386_lowmem.h similarity index 72% rename from iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h rename to osfmk/i386/i386_lowmem.h index b3ce6e92b..8a1fe906a 100644 --- a/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h +++ b/osfmk/i386/i386_lowmem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,17 +20,17 @@ * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _APPLEINTELCLOCK_H -#define _APPLEINTELCLOCK_H +#ifndef _I386_LOWMEM_H_ +#define _I386_LOWMEM_H_ -#include -class AppleIntelClock : public IOService -{ - OSDeclareDefaultStructors(AppleIntelClock); +#ifdef __APPLE_API_PRIVATE -public: - virtual bool start(IOService * provider); -}; +#define I386_LOWMEM_RESERVED 0x18 -#endif /* _APPLEINTELCLOCK_H */ +#define I386_HIB_PAGETABLE 0x13 +#define I386_HIB_PAGETABLE_COUNT 5 + +#endif /* __APPLE_API_PRIVATE */ + +#endif /* !_I386_LOWMEM_H_ */ diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index e419a5374..aa119cda4 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -50,7 +50,6 @@ #include #include -#include #include @@ -70,32 +69,38 @@ #include #include #include -#include #include #include #include -#ifdef __MACHO__ #include -#endif +#include +#include "i386_lowmem.h" vm_size_t mem_size = 0; vm_offset_t first_avail = 0;/* first after page tables */ vm_offset_t last_addr; -uint64_t max_mem; -uint64_t sane_size = 0; /* we are going to use the booter memory - table info to construct this */ +uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */ +uint64_t mem_actual; +uint64_t sane_size = 0; /* Memory size to use for defaults calculations */ + +#define MAXBOUNCEPOOL (128 * 1024 * 1024) +#define MAXLORESERVE ( 32 * 1024 * 1024) + +extern int bsd_mbuf_cluster_reserve(void); + + +uint32_t bounce_pool_base = 0; +uint32_t bounce_pool_size = 0; -pmap_paddr_t avail_start, avail_end; +static void reserve_bouncepool(uint32_t); + + +pmap_paddr_t avail_start, avail_end; vm_offset_t virtual_avail, virtual_end; -pmap_paddr_t avail_remaining; +static pmap_paddr_t avail_remaining; vm_offset_t static_memory_end = 0; -#ifndef __MACHO__ -extern char edata, end; -#endif - -#ifdef __MACHO__ #include vm_offset_t edata, etext, end; @@ -112,22 +117,55 @@ void *sectPRELINKB; int sectSizePRELINK; void *sectHIBB; int sectSizeHIB; extern void *getsegdatafromheader(struct mach_header *, const char *, int *); -#endif +extern struct segment_command *getsegbyname(const char *); +extern struct section *firstsect(struct segment_command *); +extern struct section *nextsect(struct segment_command *, struct section *); + + +void +i386_macho_zerofill(void) +{ + struct segment_command *sgp; + struct section *sp; + + sgp = getsegbyname("__DATA"); + if (sgp) { + sp = firstsect(sgp); + if (sp) { + do { + if ((sp->flags & S_ZEROFILL)) + bzero((char *) sp->addr, sp->size); + } while ((sp = nextsect(sgp, sp))); + } + } + + return; +} /* * Basic VM initialization. */ void -i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) +i386_vm_init(uint64_t maxmem, + boolean_t IA32e, + boot_args *args) { pmap_memory_region_t *pmptr; - MemoryRange *mptr; + pmap_memory_region_t *prev_pmptr; + EfiMemoryRange *mptr; + unsigned int mcount; + unsigned int msize; ppnum_t fap; unsigned int i; - ppnum_t maxpg = (maxmem >> I386_PGSHIFT); + unsigned int safeboot; + ppnum_t maxpg = 0; + uint32_t pmap_type; + uint32_t maxbouncepoolsize; + uint32_t maxloreserve; + uint32_t maxdmaaddr; -#ifdef __MACHO__ - /* Now retrieve addresses for end, edata, and etext + /* + * Now retrieve addresses for end, edata, and etext * from MACH-O headers. */ @@ -146,279 +184,337 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; edata = (vm_offset_t) sectDATAB + sectSizeDATA; -#endif -#ifndef __MACHO__ - /* - * Zero the BSS. - */ - - bzero((char *)&edata,(unsigned)(&end - &edata)); -#endif - - /* - * Initialize the pic prior to any possible call to an spl. - */ - set_cpu_model(); + cpuid_set_info(); vm_set_page_size(); /* * Compute the memory size. */ + if ((1 == vm_himemory_mode) || PE_parse_boot_arg("-x", &safeboot)) { + maxpg = 1 << (32 - I386_PGSHIFT); + } avail_remaining = 0; avail_end = 0; pmptr = pmap_memory_regions; + prev_pmptr = 0; pmap_memory_region_count = pmap_memory_region_current = 0; fap = (ppnum_t) i386_btop(first_avail); - mptr = args->memoryMap; -#ifdef PAE -#define FOURGIG 0x0000000100000000ULL - for (i=0; i < args->memoryMapCount; i++,mptr++) { - ppnum_t base, top; - - base = (ppnum_t) (mptr->base >> I386_PGSHIFT); - top = (ppnum_t) ((mptr->base + mptr->length) >> I386_PGSHIFT) - 1; - - if (maxmem) { - if (base >= maxpg) break; - top = (top > maxpg)? maxpg : top; - } - - if (kMemoryRangeUsable != mptr->type) continue; - sane_size += (uint64_t)(mptr->length); -#ifdef DEVICES_HANDLE_64BIT_IO /* XXX enable else clause when I/O to high memory works */ - if (top < fap) { - /* entire range below first_avail */ - continue; - } else if (mptr->base >= FOURGIG) { - /* entire range above 4GB (pre PAE) */ - continue; - } else if ( (base < fap) && - (top > fap)) { - /* spans first_avail */ - /* put mem below first avail in table but - mark already allocated */ - pmptr->base = base; - pmptr->alloc = pmptr->end = (fap - 1); - pmptr->type = mptr->type; - /* we bump these here inline so the accounting below works - correctly */ - pmptr++; - pmap_memory_region_count++; - pmptr->alloc = pmptr->base = fap; - pmptr->type = mptr->type; - pmptr->end = top; - } else if ( (mptr->base < FOURGIG) && - ((mptr->base+mptr->length) > FOURGIG) ) { - /* spans across 4GB (pre PAE) */ - pmptr->alloc = pmptr->base = base; - pmptr->type = mptr->type; - pmptr->end = (FOURGIG >> I386_PGSHIFT) - 1; - } else { - /* entire range useable */ - pmptr->alloc = pmptr->base = base; - pmptr->type = mptr->type; - pmptr->end = top; - } -#else - if (top < fap) { - /* entire range below first_avail */ - continue; - } else if ( (base < fap) && - (top > fap)) { - /* spans first_avail */ - pmptr->alloc = pmptr->base = fap; - pmptr->type = mptr->type; - pmptr->end = top; - } else { - /* entire range useable */ - pmptr->alloc = pmptr->base = base; - pmptr->type = mptr->type; - pmptr->end = top; - } -#endif - if (i386_ptob(pmptr->end) > avail_end ) { - avail_end = i386_ptob(pmptr->end); - } - avail_remaining += (pmptr->end - pmptr->base); - pmap_memory_region_count++; - pmptr++; - } -#else /* non PAE follows */ + mptr = (EfiMemoryRange *)args->MemoryMap; + if (args->MemoryMapDescriptorSize == 0) + panic("Invalid memory map descriptor size"); + msize = args->MemoryMapDescriptorSize; + mcount = args->MemoryMapSize / msize; + #define FOURGIG 0x0000000100000000ULL - for (i=0; i < args->memoryMapCount; i++,mptr++) { - ppnum_t base, top; - - base = (ppnum_t) (mptr->base >> I386_PGSHIFT); - top = (ppnum_t) ((mptr->base + mptr->length) >> I386_PGSHIFT) - 1; - - if (maxmem) { - if (base >= maxpg) break; - top = (top > maxpg)? maxpg : top; - } - - if (kMemoryRangeUsable != mptr->type) continue; - - // save other regions - if (kMemoryRangeNVS == mptr->type) { - // Mark this as a memory range (for hibernation), - // but don't count as usable memory - pmptr->base = base; - pmptr->end = ((mptr->base + mptr->length + I386_PGBYTES - 1) >> I386_PGSHIFT) - 1; - pmptr->alloc = pmptr->end; - pmptr->type = mptr->type; - kprintf("NVS region: 0x%x ->0x%x\n", pmptr->base, pmptr->end); - } else if (kMemoryRangeUsable != mptr->type) { - continue; - } else { - // Usable memory region - sane_size += (uint64_t)(mptr->length); - if (top < fap) { - /* entire range below first_avail */ - /* salvage some low memory pages */ - /* we use some very low memory at startup */ - /* mark as already allocated here */ - pmptr->base = 0x18; /* PAE and HIB use below this */ - pmptr->alloc = pmptr->end = top; /* mark as already mapped */ - pmptr->type = mptr->type; - } else if (mptr->base >= FOURGIG) { - /* entire range above 4GB (pre PAE) */ - continue; - } else if ( (base < fap) && - (top > fap)) { - /* spans first_avail */ - /* put mem below first avail in table but - mark already allocated */ - pmptr->base = base; - pmptr->alloc = pmptr->end = (fap - 1); - pmptr->type = mptr->type; - /* we bump these here inline so the accounting below works - correctly */ - pmptr++; - pmap_memory_region_count++; - pmptr->alloc = pmptr->base = fap; - pmptr->type = mptr->type; - pmptr->end = top; - } else if ( (mptr->base < FOURGIG) && - ((mptr->base+mptr->length) > FOURGIG) ) { - /* spans across 4GB (pre PAE) */ - pmptr->alloc = pmptr->base = base; - pmptr->type = mptr->type; - pmptr->end = (FOURGIG >> I386_PGSHIFT) - 1; - } else { - /* entire range useable */ - pmptr->alloc = pmptr->base = base; - pmptr->type = mptr->type; - pmptr->end = top; - } - - if (i386_ptob(pmptr->end) > avail_end ) { - avail_end = i386_ptob(pmptr->end); - } - - avail_remaining += (pmptr->end - pmptr->base); - pmap_memory_region_count++; - pmptr++; - } + + for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { + ppnum_t base, top; + + if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) { + kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count); + break; + } + base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); + top = (ppnum_t) ((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1; + + switch (mptr->Type) { + case kEfiLoaderCode: + case kEfiLoaderData: + case kEfiBootServicesCode: + case kEfiBootServicesData: + case kEfiConventionalMemory: + /* + * Consolidate usable memory types into one. + */ + pmap_type = kEfiConventionalMemory; + sane_size += (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); + break; + + case kEfiRuntimeServicesCode: + case kEfiRuntimeServicesData: + case kEfiACPIReclaimMemory: + case kEfiACPIMemoryNVS: + case kEfiPalCode: + /* + * sane_size should reflect the total amount of physical ram + * in the system, not just the amount that is available for + * the OS to use + */ + sane_size += (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); + /* fall thru */ + + case kEfiUnusableMemory: + case kEfiMemoryMappedIO: + case kEfiMemoryMappedIOPortSpace: + case kEfiReservedMemoryType: + default: + pmap_type = mptr->Type; + } + + kprintf("EFI region: type = %d/%d, base = 0x%x, top = 0x%x\n", mptr->Type, pmap_type, base, top); + + if (maxpg) { + if (base >= maxpg) + break; + top = (top > maxpg) ? maxpg : top; + } + + /* + * handle each region + */ + if (kEfiACPIMemoryNVS == pmap_type) { + prev_pmptr = 0; + continue; + } else if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME || + pmap_type != kEfiConventionalMemory) { + prev_pmptr = 0; + continue; + } else { + /* + * Usable memory region + */ + if (top < I386_LOWMEM_RESERVED) { + prev_pmptr = 0; + continue; + } + if (top < fap) { + /* + * entire range below first_avail + * salvage some low memory pages + * we use some very low memory at startup + * mark as already allocated here + */ + if (base >= I386_LOWMEM_RESERVED) + pmptr->base = base; + else + pmptr->base = I386_LOWMEM_RESERVED; + /* + * mark as already mapped + */ + pmptr->alloc = pmptr->end = top; + pmptr->type = pmap_type; + } + else if ( (base < fap) && (top > fap) ) { + /* + * spans first_avail + * put mem below first avail in table but + * mark already allocated + */ + pmptr->base = base; + pmptr->alloc = pmptr->end = (fap - 1); + pmptr->type = pmap_type; + /* + * we bump these here inline so the accounting + * below works correctly + */ + pmptr++; + pmap_memory_region_count++; + pmptr->alloc = pmptr->base = fap; + pmptr->type = pmap_type; + pmptr->end = top; + } + else { + /* + * entire range useable + */ + pmptr->alloc = pmptr->base = base; + pmptr->type = pmap_type; + pmptr->end = top; + } + + if (i386_ptob(pmptr->end) > avail_end ) + avail_end = i386_ptob(pmptr->end); + + avail_remaining += (pmptr->end - pmptr->base); + + /* + * Consolidate contiguous memory regions, if possible + */ + if (prev_pmptr && + pmptr->type == prev_pmptr->type && + pmptr->base == pmptr->alloc && + pmptr->base == (prev_pmptr->end + 1)) { + prev_pmptr->end = pmptr->end; + } else { + pmap_memory_region_count++; + prev_pmptr = pmptr; + pmptr++; + } + } } -#endif + #ifdef PRINT_PMAP_MEMORY_TABLE - { - unsigned int j; - pmap_memory_region_t *p = pmap_memory_regions; - for (j=0;jbase, p->alloc, p->end); - } - } + { + unsigned int j; + pmap_memory_region_t *p = pmap_memory_regions; + vm_offset_t region_start, region_end; + vm_offset_t efi_start, efi_end; + for (j=0;jtype, + p->base << I386_PGSHIFT, p->alloc << I386_PGSHIFT, p->end << I386_PGSHIFT); + region_start = p->base << I386_PGSHIFT; + region_end = (p->end << I386_PGSHIFT) - 1; + mptr = args->MemoryMap; + for (i=0; iType != kEfiLoaderCode && + mptr->Type != kEfiLoaderData && + mptr->Type != kEfiBootServicesCode && + mptr->Type != kEfiBootServicesData && + mptr->Type != kEfiConventionalMemory) { + efi_start = (vm_offset_t)mptr->PhysicalStart; + efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1; + if ((efi_start >= region_start && efi_start <= region_end) || + (efi_end >= region_start && efi_end <= region_end)) { + kprintf(" *** Overlapping region with EFI runtime region %d\n", i); + } + } + + } + } + } #endif avail_start = first_avail; + mem_actual = sane_size; - if (maxmem) { /* if user set maxmem try to use it */ - uint64_t tmp = (uint64_t)maxmem; - /* can't set below first_avail or above actual memory */ - if ( (maxmem > first_avail) && (tmp < sane_size) ) { - sane_size = tmp; - avail_end = maxmem; - } +#define MEG (1024*1024) + + /* + * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory + * not reported by EFI. + */ + + sane_size = (sane_size + 128 * MEG - 1) & ~((uint64_t)(128 * MEG - 1)); + + /* + * if user set maxmem, reduce memory sizes + */ + if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) { + ppnum_t discarded_pages = (sane_size - maxmem) >> I386_PGSHIFT; + sane_size = maxmem; + if (avail_remaining > discarded_pages) + avail_remaining -= discarded_pages; + else + avail_remaining = 0; } - // round up to a megabyte - mostly accounting for the - // low mem madness - sane_size += ( 0x100000ULL - 1); - sane_size &= ~0xFFFFFULL; - -#ifndef PAE - if (sane_size < FOURGIG) - mem_size = (unsigned long) sane_size; - else - mem_size = (unsigned long) (FOURGIG >> 1); -#else - mem_size = (unsigned long) sane_size; -#endif + /* + * mem_size is only a 32 bit container... follow the PPC route + * and pin it to a 2 Gbyte maximum + */ + if (sane_size > (FOURGIG >> 1)) + mem_size = (vm_size_t)(FOURGIG >> 1); + else + mem_size = (vm_size_t)sane_size; max_mem = sane_size; - /* now make sane size sane */ -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MEG (1024*1024) - sane_size = MIN(sane_size, 256*MEG); + kprintf("Physical memory %d MB\n", sane_size/MEG); - kprintf("Physical memory %d MB\n", - mem_size/MEG); + if (!PE_parse_boot_arg("max_valid_dma_addr", &maxdmaaddr)) + max_valid_dma_address = 1024ULL * 1024ULL * 4096ULL; + else + max_valid_dma_address = ((uint64_t) maxdmaaddr) * 1024ULL * 1024ULL; + + if (!PE_parse_boot_arg("maxbouncepool", &maxbouncepoolsize)) + maxbouncepoolsize = MAXBOUNCEPOOL; + else + maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024); /* - * Initialize kernel physical map. - * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. + * bsd_mbuf_cluster_reserve depends on sane_size being set + * in order to correctly determine the size of the mbuf pool + * that will be reserved */ - pmap_bootstrap(0); + if (!PE_parse_boot_arg("maxloreserve", &maxloreserve)) + maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve(); + else + maxloreserve = maxloreserve * (1024 * 1024); + + if (avail_end >= max_valid_dma_address) { + if (maxbouncepoolsize) + reserve_bouncepool(maxbouncepoolsize); + + if (maxloreserve) + vm_lopage_poolsize = maxloreserve / PAGE_SIZE; + } + /* + * Initialize kernel physical map. + * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. + */ + pmap_bootstrap(0, IA32e); } + unsigned int pmap_free_pages(void) { return avail_remaining; } + boolean_t pmap_next_page( ppnum_t *pn) { - - while (pmap_memory_region_current < pmap_memory_region_count) { - if (pmap_memory_regions[pmap_memory_region_current].alloc == - pmap_memory_regions[pmap_memory_region_current].end) { - pmap_memory_region_current++; - continue; - } - *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; - avail_remaining--; - - return TRUE; + + if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { + if (pmap_memory_regions[pmap_memory_region_current].alloc == + pmap_memory_regions[pmap_memory_region_current].end) { + pmap_memory_region_current++; + continue; + } + *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; + avail_remaining--; + + return TRUE; } return FALSE; } + boolean_t pmap_valid_page( ppnum_t pn) { - unsigned int i; - pmap_memory_region_t *pmptr = pmap_memory_regions; - - assert(pn); - for (i=0; i= pmptr->base) && (pn <= pmptr->end) ) { - if (pmptr->type == kMemoryRangeUsable) - return TRUE; - else - return FALSE; - } - } - return FALSE; + unsigned int i; + pmap_memory_region_t *pmptr = pmap_memory_regions; + + assert(pn); + for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { + if ( (pn >= pmptr->base) && (pn <= pmptr->end) && pmptr->type == kEfiConventionalMemory ) + return TRUE; + } + return FALSE; +} + + +static void +reserve_bouncepool(uint32_t bounce_pool_wanted) +{ + pmap_memory_region_t *pmptr = pmap_memory_regions; + pmap_memory_region_t *lowest = NULL; + unsigned int i; + unsigned int pages_needed; + + pages_needed = bounce_pool_wanted / PAGE_SIZE; + + for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { + if ( (pmptr->type == kEfiConventionalMemory) && ((pmptr->end - pmptr->alloc) >= pages_needed) ) { + if ( (lowest == NULL) || (pmptr->alloc < lowest->alloc) ) + lowest = pmptr; + } + } + if ( (lowest != NULL) ) { + bounce_pool_base = lowest->alloc * PAGE_SIZE; + bounce_pool_size = bounce_pool_wanted; + + lowest->alloc += pages_needed; + avail_remaining -= pages_needed; + } } diff --git a/osfmk/i386/idt.s b/osfmk/i386/idt.s index e68a8f4fd..ea73007fd 100644 --- a/osfmk/i386/idt.s +++ b/osfmk/i386/idt.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -52,84 +52,131 @@ #include #include #include +#include +#include + +#define HI_DATA(lo_addr) ( (EXT(lo_addr) - EXT(hi_remap_data)) + HIGH_IDT_BASE ) +#define HI_TEXT(lo_text) ( (EXT(lo_text) - EXT(hi_remap_text)) + HIGH_MEM_BASE ) /* * Interrupt descriptor table and code vectors for it. */ #define IDT_BASE_ENTRY(vec,seg,type) \ .data ;\ - .long vec ;\ + .long EXT(vec) - EXT(hi_remap_text) + HIGH_MEM_BASE ;\ + .word seg ;\ + .byte 0 ;\ + .byte type ;\ + .text + +#define IDT_BASE_ENTRY_INT(vec,seg,type) \ + .data ;\ + .long vec - EXT(hi_remap_text) + HIGH_MEM_BASE ;\ + .word seg ;\ + .byte 0 ;\ + .byte type ;\ + .text + +#define IDT_BASE_ENTRY_TG(vec,seg,type) \ + .data ;\ + .long 0 ;\ .word seg ;\ .byte 0 ;\ .byte type ;\ .text #define IDT_ENTRY(vec,type) IDT_BASE_ENTRY(vec,KERNEL_CS,type) +#define IDT_ENTRY_INT(vec,type) IDT_BASE_ENTRY_INT(vec,KERNEL_CS,type) /* * No error code. Clear error code and push trap number. */ #define EXCEPTION(n,name) \ - IDT_ENTRY(EXT(name),K_TRAP_GATE);\ + IDT_ENTRY(name,K_INTR_GATE);\ Entry(name) ;\ pushl $0 ;\ pushl $(n) ;\ - jmp EXT(alltraps) + pusha ;\ + movl $ EXT(lo_alltraps),%ebx ;\ + jmp enter_lohandler + /* * Interrupt from user. Clear error code and push trap number. */ #define EXCEP_USR(n,name) \ - IDT_ENTRY(EXT(name),U_TRAP_GATE);\ + IDT_ENTRY(name,U_INTR_GATE);\ Entry(name) ;\ pushl $0 ;\ pushl $(n) ;\ - jmp EXT(alltraps) + pusha ;\ + movl $ EXT(lo_alltraps),%ebx ;\ + jmp enter_lohandler + /* * Special interrupt code. */ #define EXCEP_SPC(n,name) \ - IDT_ENTRY(EXT(name),K_TRAP_GATE) - + IDT_ENTRY(name,K_INTR_GATE) + /* * Special interrupt code from user. */ #define EXCEP_SPC_USR(n,name) \ - IDT_ENTRY(EXT(name),U_TRAP_GATE) + IDT_ENTRY(name,U_INTR_GATE) + /* * Extra-special interrupt code. Note that no offset may be * specified in a task gate descriptor, so name is ignored. */ #define EXCEP_TASK(n,name) \ - IDT_BASE_ENTRY(0,DEBUG_TSS,K_TASK_GATE) + IDT_BASE_ENTRY_TG(0,DEBUG_TSS,K_TASK_GATE) + +/* Double-fault fatal handler */ +#define DF_FATAL_TASK(n,name) \ + IDT_BASE_ENTRY_TG(0,DF_TSS,K_TASK_GATE) + +/* machine-check handler */ +#define MC_FATAL_TASK(n,name) \ + IDT_BASE_ENTRY_TG(0,MC_TSS,K_TASK_GATE) /* * Error code has been pushed. Push trap number. */ #define EXCEP_ERR(n,name) \ - IDT_ENTRY(EXT(name),K_TRAP_GATE);\ + IDT_ENTRY(name,K_INTR_GATE);\ Entry(name) ;\ pushl $(n) ;\ - jmp EXT(alltraps) + pusha ;\ + movl $ EXT(lo_alltraps),%ebx ;\ + jmp enter_lohandler + /* * Interrupt. */ #define INTERRUPT(n) \ - IDT_ENTRY(0f,K_INTR_GATE) ;\ -0: ;\ - pushl %eax ;\ - movl $(n),%eax ;\ - jmp EXT(all_intrs) + IDT_ENTRY_INT(L_ ## n,K_INTR_GATE) ;\ + .align FALIGN ;\ +L_ ## n: ;\ + pushl $0 ;\ + pushl $(n) ;\ + pusha ;\ + movl $ EXT(lo_allintrs),%ebx ;\ + jmp enter_lohandler + .data -Entry(idt) + .align 12 +Entry(master_idt) +Entry(hi_remap_data) .text +Entry(hi_remap_text) EXCEPTION(0x00,t_zero_div) -EXCEP_SPC(0x01,t_debug) +EXCEP_SPC(0x01,hi_debug) INTERRUPT(0x02) /* NMI */ EXCEP_USR(0x03,t_int3) EXCEP_USR(0x04,t_into) @@ -139,23 +186,23 @@ EXCEPTION(0x07,t_nofpu) #if MACH_KDB EXCEP_TASK(0x08,db_task_dbl_fault) #else -EXCEPTION(0x08,a_dbl_fault) +DF_FATAL_TASK(0x08,df_task_start) #endif EXCEPTION(0x09,a_fpu_over) EXCEPTION(0x0a,a_inv_tss) -EXCEP_SPC(0x0b,t_segnp) +EXCEP_SPC(0x0b,hi_segnp) #if MACH_KDB EXCEP_TASK(0x0c,db_task_stk_fault) #else EXCEP_ERR(0x0c,t_stack_fault) #endif -EXCEP_SPC(0x0d,t_gen_prot) -EXCEP_SPC(0x0e,t_page_fault) +EXCEP_SPC(0x0d,hi_gen_prot) +EXCEP_SPC(0x0e,hi_page_fault) EXCEPTION(0x0f,t_trap_0f) EXCEPTION(0x10,t_fpu_err) EXCEPTION(0x11,t_trap_11) -EXCEPTION(0x12,t_trap_12) -EXCEPTION(0x13,t_trap_13) +MC_FATAL_TASK(0x12,mc_task_start) +EXCEPTION(0x13,t_sse_err) EXCEPTION(0x14,t_trap_14) EXCEPTION(0x15,t_trap_15) EXCEPTION(0x16,t_trap_16) @@ -271,10 +318,11 @@ INTERRUPT(0x7d) INTERRUPT(0x7e) INTERRUPT(0x7f) -EXCEP_SPC_USR(0x80,syscall_int80) -INTERRUPT(0x81) -INTERRUPT(0x82) -INTERRUPT(0x83) +EXCEP_SPC_USR(0x80,hi_unix_scall) +EXCEP_SPC_USR(0x81,hi_mach_scall) +EXCEP_SPC_USR(0x82,hi_mdep_scall) +EXCEP_SPC_USR(0x83,hi_diag_scall) + INTERRUPT(0x84) INTERRUPT(0x85) INTERRUPT(0x86) @@ -407,3 +455,399 @@ INTERRUPT(0xfd) INTERRUPT(0xfe) EXCEPTION(0xff,t_preempt) + + .data +Entry(lo_kernel_cr3) + .long 0 + .long 0 + + .text + + +/******************************************************************************************************* + * + * Trap/interrupt entry points. + * + * All traps must create the following save area on the PCB "stack": + * + * gs + * fs + * es + * ds + * edi + * esi + * ebp + * cr2 if page fault - otherwise unused + * ebx + * edx + * ecx + * eax + * trap number + * error code + * eip + * cs + * eflags + * user esp - if from user + * user ss - if from user + */ + + +Entry(hi_ret_to_user) + movl %esp,%ebx + movl %gs:CPU_ACTIVE_THREAD,%ecx + subl ACT_PCB_ISS(%ecx),%ebx + movl $(WINDOWS_CLEAN),ACT_COPYIO_STATE(%ecx) + + movl ACT_PCB_IDS(%ecx),%eax /* get debug state struct */ + cmpl $0,%eax /* is there a debug state */ + je 1f /* branch if not */ + movl DS_DR0(%eax), %ecx /* Load the 32 bit debug registers */ + movl %ecx, %db0 + movl DS_DR1(%eax), %ecx + movl %ecx, %db1 + movl DS_DR2(%eax), %ecx + movl %ecx, %db2 + movl DS_DR3(%eax), %ecx + movl %ecx, %db3 + movl DS_DR7(%eax), %eax +1: + addl %gs:CPU_HI_ISS,%ebx /* rebase PCB save area to high addr */ + movl %gs:CPU_TASK_CR3,%ecx + movl %ecx,%gs:CPU_ACTIVE_CR3 + movl %ebx,%esp /* switch to hi based PCB stack */ + movl %ecx,%cr3 /* switch to user's address space */ + + cmpl $0,%eax /* is dr7 set to something? */ + je 2f /* branch if not */ + movl %eax,%db7 /* Set dr7 */ +2: + +Entry(hi_ret_to_kernel) + + popl %eax /* ignore flavor of saved state */ +EXT(ret_popl_gs): + popl %gs /* restore segment registers */ +EXT(ret_popl_fs): + popl %fs +EXT(ret_popl_es): + popl %es +EXT(ret_popl_ds): + popl %ds + + popa /* restore general registers */ + addl $8,%esp /* discard trap number and error code */ + + cmpl $(SYSENTER_CS),4(%esp) /* test for fast entry/exit */ + je fast_exit +EXT(ret_iret): + iret /* return from interrupt */ +fast_exit: + popl %edx /* user return eip */ + popl %ecx /* pop and toss cs */ + andl $(~EFL_IF),(%esp) /* clear intrs enabled, see sti below */ + popf /* flags - carry denotes failure */ + popl %ecx /* user return esp */ + sti /* interrupts enabled after sysexit */ + sysexit + +/*******************************************************************************************************/ + + +Entry(hi_unix_scall) + pushl %eax /* save system call number */ + pushl $0 /* clear trap number slot */ + pusha /* save the general registers */ + movl $ EXT(lo_unix_scall),%ebx + jmp enter_lohandler + + +Entry(hi_mach_scall) + pushl %eax /* save system call number */ + pushl $0 /* clear trap number slot */ + pusha /* save the general registers */ + movl $ EXT(lo_mach_scall),%ebx + jmp enter_lohandler + + +Entry(hi_mdep_scall) + pushl %eax /* save system call number */ + pushl $0 /* clear trap number slot */ + pusha /* save the general registers */ + movl $ EXT(lo_mdep_scall),%ebx + jmp enter_lohandler + + +Entry(hi_diag_scall) + pushl %eax // Save sselector + pushl $0 // Clear trap number slot + pusha // save the general registers + movl $EXT(lo_diag_scall),%ebx // Get the function down low to transfer to + jmp enter_lohandler // Leap to it... + + +/* + * sysenter entry point + * Requires user code to set up: + * edx: user instruction pointer (return address) + * ecx: user stack pointer + * on which is pushed stub ret addr and saved ebx + * Return to user-space is made using sysexit. + * Note: sysenter/sysexit cannot be used for calls returning a value in edx, + * or requiring ecx to be preserved. + */ +Entry(hi_sysenter) + movl (%esp), %esp /* switch from intr stack to pcb */ + /* + * Push values on to the PCB stack + * to cons up the saved state. + */ + pushl $(USER_DS) /* ss */ + pushl %ecx /* uesp */ + pushf /* flags */ + /* + * Clear, among others, the Nested Task (NT) flags bit; + * This is cleared by INT, but not by sysenter, which only + * clears RF, VM and IF. + */ + pushl $0 + popfl + pushl $(SYSENTER_CS) /* cs */ +hi_sysenter_2: + pushl %edx /* eip */ + pushl %eax /* err/eax - syscall code */ + pushl $0 /* clear trap number slot */ + pusha /* save the general registers */ + orl $(EFL_IF),R_EFLAGS-R_EDI(%esp) /* (edi was last reg pushed) */ + movl $ EXT(lo_sysenter),%ebx +enter_lohandler: + pushl %ds + pushl %es + pushl %fs + pushl %gs +enter_lohandler1: + pushl $(SS_32) /* 32-bit state flavor */ + mov %ss,%eax + mov %eax,%ds + mov %eax,%fs + mov %eax,%es /* switch to kernel data seg */ + mov $(CPU_DATA_GS),%eax + mov %eax,%gs + cld /* clear direction flag */ + /* + * Switch to kernel's address space if necessary + */ + movl HI_DATA(lo_kernel_cr3),%ecx + movl %cr3,%eax + cmpl %eax,%ecx + je 1f + movl %ecx,%cr3 + movl %ecx,%gs:CPU_ACTIVE_CR3 +1: + testb $3,R_CS(%esp) + jz 2f + movl %esp,%edx /* came from user mode */ + subl %gs:CPU_HI_ISS,%edx + movl %gs:CPU_ACTIVE_THREAD,%ecx + addl ACT_PCB_ISS(%ecx),%edx /* rebase the high stack to a low address */ + movl %edx,%esp + cmpl $0, ACT_PCB_IDS(%ecx) /* Is there a debug register state? */ + je 2f + movl $0, %ecx /* If so, reset DR7 (the control) */ + movl %ecx, %dr7 +2: + movl R_TRAPNO(%esp),%ecx // Get the interrupt vector + addl $1,%gs:hwIntCnt(,%ecx,4) // Bump the count + jmp *%ebx + + +/* + * Page fault traps save cr2. + */ +Entry(hi_page_fault) + pushl $(T_PAGE_FAULT) /* mark a page fault trap */ + pusha /* save the general registers */ + movl %cr2,%eax /* get the faulting address */ + movl %eax,R_CR2-R_EDI(%esp) /* save in esp save slot */ + + movl $ EXT(lo_alltraps),%ebx + jmp enter_lohandler + + + +/* + * Debug trap. Check for single-stepping across system call into + * kernel. If this is the case, taking the debug trap has turned + * off single-stepping - save the flags register with the trace + * bit set. + */ +Entry(hi_debug) + testb $3,4(%esp) + jnz hi_debug_trap + /* trap came from kernel mode */ + cmpl $(HI_TEXT(hi_mach_scall)),(%esp) + jne 6f + addl $12,%esp /* remove eip/cs/eflags from debug_trap */ + jmp EXT(hi_mach_scall) /* continue system call entry */ +6: + cmpl $(HI_TEXT(hi_mdep_scall)),(%esp) + jne 5f + addl $12,%esp /* remove eip/cs/eflags from debug_trap */ + jmp EXT(hi_mdep_scall) /* continue system call entry */ +5: + cmpl $(HI_TEXT(hi_unix_scall)),(%esp) + jne 4f + addl $12,%esp /* remove eip/cs/eflags from debug_trap */ + jmp EXT(hi_unix_scall) /* continue system call entry */ +4: + cmpl $(HI_TEXT(hi_sysenter)),(%esp) + jne hi_debug_trap + /* + * eip/cs/flags have been pushed on intr stack + * We have to switch to pcb stack and copy eflags. + * Note: setting the cs selector to SYSENTER_TF_CS + * will cause the return to user path to take the iret path so + * that eflags (containing the trap bit) is set atomically. + * In unix_syscall this is tested so that we'll rewind the pc + * to account for with sysenter or int entry. + */ + addl $8,%esp /* remove eip/cs */ + pushl %ecx /* save %ecx */ + movl 8(%esp),%ecx /* top of intr stack -> pcb stack */ + xchgl %ecx,%esp /* switch to pcb stack */ + pushl $(USER_DS) /* ss */ + pushl %ss:(%ecx) /* %ecx into uesp slot */ + pushl %ss:4(%ecx) /* eflags */ + movl %ss:(%ecx),%ecx /* restore %ecx */ + pushl $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */ + jmp hi_sysenter_2 /* continue sysenter entry */ +hi_debug_trap: + pushl $0 + pushl $(T_DEBUG) /* handle as user trap */ + pusha /* save the general registers */ + movl $ EXT(lo_alltraps),%ebx + jmp enter_lohandler + + + +/* + * General protection or segment-not-present fault. + * Check for a GP/NP fault in the kernel_return + * sequence; if there, report it as a GP/NP fault on the user's instruction. + * + * esp-> 0: trap code (NP or GP) + * 4: segment number in error + * 8 eip + * 12 cs + * 16 eflags + * 20 old registers (trap is from kernel) + */ +Entry(hi_gen_prot) + pushl $(T_GENERAL_PROTECTION) /* indicate fault type */ + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +Entry(hi_segnp) + pushl $(T_SEGMENT_NOT_PRESENT) + /* indicate fault type */ +trap_check_kernel_exit: + testb $3,12(%esp) + jnz hi_take_trap + /* trap was from kernel mode, so */ + /* check for the kernel exit sequence */ + cmpl $(HI_TEXT(ret_iret)),8(%esp) /* on IRET? */ + je fault_iret + cmpl $(HI_TEXT(ret_popl_ds)),8(%esp) /* popping DS? */ + je fault_popl_ds + cmpl $(HI_TEXT(ret_popl_es)),8(%esp) /* popping ES? */ + je fault_popl_es + cmpl $(HI_TEXT(ret_popl_fs)),8(%esp) /* popping FS? */ + je fault_popl_fs + cmpl $(HI_TEXT(ret_popl_gs)),8(%esp) /* popping GS? */ + je fault_popl_gs +hi_take_trap: + pusha /* save the general registers */ + movl $ EXT(lo_alltraps),%ebx + jmp enter_lohandler + + +/* + * GP/NP fault on IRET: CS or SS is in error. + * All registers contain the user's values. + * + * on SP is + * 0 trap number + * 4 errcode + * 8 eip + * 12 cs --> trapno + * 16 efl --> errcode + * 20 user eip + * 24 user cs + * 28 user eflags + * 32 user esp + * 36 user ss + */ +fault_iret: + movl %eax,8(%esp) /* save eax (we don`t need saved eip) */ + popl %eax /* get trap number */ + movl %eax,12-4(%esp) /* put in user trap number */ + popl %eax /* get error code */ + movl %eax,16-8(%esp) /* put in user errcode */ + popl %eax /* restore eax */ + /* now treat as fault from user */ + pusha /* save the general registers */ + movl $ EXT(lo_alltraps),%ebx + jmp enter_lohandler + +/* + * Fault restoring a segment register. The user's registers are still + * saved on the stack. The offending segment register has not been + * popped. + */ +fault_popl_ds: + popl %eax /* get trap number */ + popl %edx /* get error code */ + addl $12,%esp /* pop stack to user regs */ + jmp push_es /* (DS on top of stack) */ +fault_popl_es: + popl %eax /* get trap number */ + popl %edx /* get error code */ + addl $12,%esp /* pop stack to user regs */ + jmp push_fs /* (ES on top of stack) */ +fault_popl_fs: + popl %eax /* get trap number */ + popl %edx /* get error code */ + addl $12,%esp /* pop stack to user regs */ + jmp push_gs /* (FS on top of stack) */ +fault_popl_gs: + popl %eax /* get trap number */ + popl %edx /* get error code */ + addl $12,%esp /* pop stack to user regs */ + jmp push_none /* (GS on top of stack) */ + +push_es: + pushl %es /* restore es, */ +push_fs: + pushl %fs /* restore fs, */ +push_gs: + pushl %gs /* restore gs. */ +push_none: + movl %eax,R_TRAPNO(%esp) /* set trap number */ + movl %edx,R_ERR(%esp) /* set error code */ + /* now treat as fault from user */ + /* except that segment registers are */ + /* already pushed */ + movl $ EXT(lo_alltraps),%ebx + jmp enter_lohandler1 + + + .text + + +Entry(lo_ret_to_user) + jmp *1f +1: .long HI_TEXT(hi_ret_to_user) + +Entry(lo_ret_to_kernel) + jmp *1f +1: .long HI_TEXT(hi_ret_to_kernel) + +Entry(hi_remap_etext) diff --git a/osfmk/i386/idt64.s b/osfmk/i386/idt64.s new file mode 100644 index 000000000..b79f8d7aa --- /dev/null +++ b/osfmk/i386/idt64.s @@ -0,0 +1,1144 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ +#include +#include +#include + +/* + * Locore handlers. + */ +#define LO_ALLINTRS EXT(lo_allintrs) +#define LO_ALLTRAPS EXT(lo_alltraps) +#define LO_SYSENTER EXT(lo_sysenter) +#define LO_SYSCALL EXT(lo_syscall) +#define LO_UNIX_SCALL EXT(lo_unix_scall) +#define LO_MACH_SCALL EXT(lo_mach_scall) +#define LO_MDEP_SCALL EXT(lo_mdep_scall) +#define LO_DIAG_SCALL EXT(lo_diag_scall) +#define LO_DOUBLE_FAULT EXT(lo_df64) +#define LO_MACHINE_CHECK EXT(lo_mc64) + +/* + * Interrupt descriptor table and code vectors for it. + * + * The IDT64_BASE_ENTRY macro lays down a fake descriptor that must be + * reformatted ("fixed") before use. + * All vector are rebased in uber-space. + * Special vectors (e.g. double-fault) use a non-0 IST. + */ +#define IDT64_BASE_ENTRY(vec,seg,ist,type) \ + .data ;\ + .long vec ;\ + .long KERNEL_UBER_BASE_HI32 ;\ + .word seg ;\ + .byte ist*16 ;\ + .byte type ;\ + .long 0 ;\ + .text + +#define IDT64_ENTRY(vec,ist,type) \ + IDT64_BASE_ENTRY(EXT(vec),KERNEL64_CS,ist,type) +#define IDT64_ENTRY_LOCAL(vec,ist,type) \ + IDT64_BASE_ENTRY(vec,KERNEL64_CS,ist,type) + +/* + * Push trap number and address of compatibility mode handler, + * then branch to common trampoline. Error already pushed. + */ +#define EXCEP64_ERR(n,name) \ + IDT64_ENTRY(name,0,K_INTR_GATE) ;\ +Entry(name) ;\ + push $(n) ;\ + movl $(LO_ALLTRAPS), 4(%rsp) ;\ + jmp L_enter_lohandler + + +/* + * Push error(0), trap number and address of compatibility mode handler, + * then branch to common trampoline. + */ +#define EXCEPTION64(n,name) \ + IDT64_ENTRY(name,0,K_INTR_GATE) ;\ +Entry(name) ;\ + push $0 ;\ + push $(n) ;\ + movl $(LO_ALLTRAPS), 4(%rsp) ;\ + jmp L_enter_lohandler + + +/* + * Interrupt from user. + * Push error (0), trap number and address of compatibility mode handler, + * then branch to common trampoline. + */ +#define EXCEP64_USR(n,name) \ + IDT64_ENTRY(name,0,U_INTR_GATE) ;\ +Entry(name) ;\ + push $0 ;\ + push $(n) ;\ + movl $(LO_ALLTRAPS), 4(%rsp) ;\ + jmp L_enter_lohandler + + +/* + * Special interrupt code from user. + */ +#define EXCEP64_SPC_USR(n,name) \ + IDT64_ENTRY(name,0,U_INTR_GATE) + + +/* + * Special interrupt code. + * In 64-bit mode we may use an IST slot instead of task gates. + */ +#define EXCEP64_IST(n,name,ist) \ + IDT64_ENTRY(name,ist,K_INTR_GATE) +#define EXCEP64_SPC(n,name) \ + IDT64_ENTRY(name,0,K_INTR_GATE) + + +/* + * Interrupt. + * Push zero err, interrupt vector and address of compatibility mode handler, + * then branch to common trampoline. + */ +#define INTERRUPT64(n) \ + IDT64_ENTRY_LOCAL(L_ ## n,0,K_INTR_GATE) ;\ + .align FALIGN ;\ +L_ ## n: ;\ + push $0 ;\ + push $(n) ;\ + movl $(LO_ALLINTRS), 4(%rsp) ;\ + jmp L_enter_lohandler + + + .data + .align 12 +Entry(master_idt64) +Entry(hi64_data_base) + .text + .code64 +Entry(hi64_text_base) + +EXCEPTION64(0x00,t64_zero_div) +EXCEP64_SPC(0x01,hi64_debug) +INTERRUPT64(0x02) /* NMI */ +EXCEP64_USR(0x03,t64_int3) +EXCEP64_USR(0x04,t64_into) +EXCEP64_USR(0x05,t64_bounds) +EXCEPTION64(0x06,t64_invop) +EXCEPTION64(0x07,t64_nofpu) +#if MACH_KDB +EXCEP64_IST(0x08,db_task_dbl_fault64,1) +#else +EXCEP64_IST(0x08,hi64_double_fault,1) +#endif +EXCEPTION64(0x09,a64_fpu_over) +EXCEPTION64(0x0a,a64_inv_tss) +EXCEP64_SPC(0x0b,hi64_segnp) +#if MACH_KDB +EXCEP64_IST(0x0c,db_task_stk_fault64,1) +#else +EXCEP64_IST(0x0c,hi64_stack_fault,1) +#endif +EXCEP64_SPC(0x0d,hi64_gen_prot) +EXCEP64_ERR(0x0e,t64_page_fault) +EXCEPTION64(0x0f,t64_trap_0f) +EXCEPTION64(0x10,t64_fpu_err) +EXCEPTION64(0x11,t64_trap_11) +EXCEP64_IST(0x12,mc64,1) +EXCEPTION64(0x13,t64_sse_err) +EXCEPTION64(0x14,t64_trap_14) +EXCEPTION64(0x15,t64_trap_15) +EXCEPTION64(0x16,t64_trap_16) +EXCEPTION64(0x17,t64_trap_17) +EXCEPTION64(0x18,t64_trap_18) +EXCEPTION64(0x19,t64_trap_19) +EXCEPTION64(0x1a,t64_trap_1a) +EXCEPTION64(0x1b,t64_trap_1b) +EXCEPTION64(0x1c,t64_trap_1c) +EXCEPTION64(0x1d,t64_trap_1d) +EXCEPTION64(0x1e,t64_trap_1e) +EXCEPTION64(0x1f,t64_trap_1f) + +INTERRUPT64(0x20) +INTERRUPT64(0x21) +INTERRUPT64(0x22) +INTERRUPT64(0x23) +INTERRUPT64(0x24) +INTERRUPT64(0x25) +INTERRUPT64(0x26) +INTERRUPT64(0x27) +INTERRUPT64(0x28) +INTERRUPT64(0x29) +INTERRUPT64(0x2a) +INTERRUPT64(0x2b) +INTERRUPT64(0x2c) +INTERRUPT64(0x2d) +INTERRUPT64(0x2e) +INTERRUPT64(0x2f) + +INTERRUPT64(0x30) +INTERRUPT64(0x31) +INTERRUPT64(0x32) +INTERRUPT64(0x33) +INTERRUPT64(0x34) +INTERRUPT64(0x35) +INTERRUPT64(0x36) +INTERRUPT64(0x37) +INTERRUPT64(0x38) +INTERRUPT64(0x39) +INTERRUPT64(0x3a) +INTERRUPT64(0x3b) +INTERRUPT64(0x3c) +INTERRUPT64(0x3d) +INTERRUPT64(0x3e) +INTERRUPT64(0x3f) + +INTERRUPT64(0x40) +INTERRUPT64(0x41) +INTERRUPT64(0x42) +INTERRUPT64(0x43) +INTERRUPT64(0x44) +INTERRUPT64(0x45) +INTERRUPT64(0x46) +INTERRUPT64(0x47) +INTERRUPT64(0x48) +INTERRUPT64(0x49) +INTERRUPT64(0x4a) +INTERRUPT64(0x4b) +INTERRUPT64(0x4c) +INTERRUPT64(0x4d) +INTERRUPT64(0x4e) +INTERRUPT64(0x4f) + +INTERRUPT64(0x50) +INTERRUPT64(0x51) +INTERRUPT64(0x52) +INTERRUPT64(0x53) +INTERRUPT64(0x54) +INTERRUPT64(0x55) +INTERRUPT64(0x56) +INTERRUPT64(0x57) +INTERRUPT64(0x58) +INTERRUPT64(0x59) +INTERRUPT64(0x5a) +INTERRUPT64(0x5b) +INTERRUPT64(0x5c) +INTERRUPT64(0x5d) +INTERRUPT64(0x5e) +INTERRUPT64(0x5f) + +INTERRUPT64(0x60) +INTERRUPT64(0x61) +INTERRUPT64(0x62) +INTERRUPT64(0x63) +INTERRUPT64(0x64) +INTERRUPT64(0x65) +INTERRUPT64(0x66) +INTERRUPT64(0x67) +INTERRUPT64(0x68) +INTERRUPT64(0x69) +INTERRUPT64(0x6a) +INTERRUPT64(0x6b) +INTERRUPT64(0x6c) +INTERRUPT64(0x6d) +INTERRUPT64(0x6e) +INTERRUPT64(0x6f) + +INTERRUPT64(0x70) +INTERRUPT64(0x71) +INTERRUPT64(0x72) +INTERRUPT64(0x73) +INTERRUPT64(0x74) +INTERRUPT64(0x75) +INTERRUPT64(0x76) +INTERRUPT64(0x77) +INTERRUPT64(0x78) +INTERRUPT64(0x79) +INTERRUPT64(0x7a) +INTERRUPT64(0x7b) +INTERRUPT64(0x7c) +INTERRUPT64(0x7d) +INTERRUPT64(0x7e) +INTERRUPT64(0x7f) + +EXCEP64_SPC_USR(0x80,hi64_unix_scall) +EXCEP64_SPC_USR(0x81,hi64_mach_scall) +EXCEP64_SPC_USR(0x82,hi64_mdep_scall) +EXCEP64_SPC_USR(0x83,hi64_diag_scall) + +INTERRUPT64(0x84) +INTERRUPT64(0x85) +INTERRUPT64(0x86) +INTERRUPT64(0x87) +INTERRUPT64(0x88) +INTERRUPT64(0x89) +INTERRUPT64(0x8a) +INTERRUPT64(0x8b) +INTERRUPT64(0x8c) +INTERRUPT64(0x8d) +INTERRUPT64(0x8e) +INTERRUPT64(0x8f) + +INTERRUPT64(0x90) +INTERRUPT64(0x91) +INTERRUPT64(0x92) +INTERRUPT64(0x93) +INTERRUPT64(0x94) +INTERRUPT64(0x95) +INTERRUPT64(0x96) +INTERRUPT64(0x97) +INTERRUPT64(0x98) +INTERRUPT64(0x99) +INTERRUPT64(0x9a) +INTERRUPT64(0x9b) +INTERRUPT64(0x9c) +INTERRUPT64(0x9d) +INTERRUPT64(0x9e) +INTERRUPT64(0x9f) + +INTERRUPT64(0xa0) +INTERRUPT64(0xa1) +INTERRUPT64(0xa2) +INTERRUPT64(0xa3) +INTERRUPT64(0xa4) +INTERRUPT64(0xa5) +INTERRUPT64(0xa6) +INTERRUPT64(0xa7) +INTERRUPT64(0xa8) +INTERRUPT64(0xa9) +INTERRUPT64(0xaa) +INTERRUPT64(0xab) +INTERRUPT64(0xac) +INTERRUPT64(0xad) +INTERRUPT64(0xae) +INTERRUPT64(0xaf) + +INTERRUPT64(0xb0) +INTERRUPT64(0xb1) +INTERRUPT64(0xb2) +INTERRUPT64(0xb3) +INTERRUPT64(0xb4) +INTERRUPT64(0xb5) +INTERRUPT64(0xb6) +INTERRUPT64(0xb7) +INTERRUPT64(0xb8) +INTERRUPT64(0xb9) +INTERRUPT64(0xba) +INTERRUPT64(0xbb) +INTERRUPT64(0xbc) +INTERRUPT64(0xbd) +INTERRUPT64(0xbe) +INTERRUPT64(0xbf) + +INTERRUPT64(0xc0) +INTERRUPT64(0xc1) +INTERRUPT64(0xc2) +INTERRUPT64(0xc3) +INTERRUPT64(0xc4) +INTERRUPT64(0xc5) +INTERRUPT64(0xc6) +INTERRUPT64(0xc7) +INTERRUPT64(0xc8) +INTERRUPT64(0xc9) +INTERRUPT64(0xca) +INTERRUPT64(0xcb) +INTERRUPT64(0xcc) +INTERRUPT64(0xcd) +INTERRUPT64(0xce) +INTERRUPT64(0xcf) + +INTERRUPT64(0xd0) +INTERRUPT64(0xd1) +INTERRUPT64(0xd2) +INTERRUPT64(0xd3) +INTERRUPT64(0xd4) +INTERRUPT64(0xd5) +INTERRUPT64(0xd6) +INTERRUPT64(0xd7) +INTERRUPT64(0xd8) +INTERRUPT64(0xd9) +INTERRUPT64(0xda) +INTERRUPT64(0xdb) +INTERRUPT64(0xdc) +INTERRUPT64(0xdd) +INTERRUPT64(0xde) +INTERRUPT64(0xdf) + +INTERRUPT64(0xe0) +INTERRUPT64(0xe1) +INTERRUPT64(0xe2) +INTERRUPT64(0xe3) +INTERRUPT64(0xe4) +INTERRUPT64(0xe5) +INTERRUPT64(0xe6) +INTERRUPT64(0xe7) +INTERRUPT64(0xe8) +INTERRUPT64(0xe9) +INTERRUPT64(0xea) +INTERRUPT64(0xeb) +INTERRUPT64(0xec) +INTERRUPT64(0xed) +INTERRUPT64(0xee) +INTERRUPT64(0xef) + +INTERRUPT64(0xf0) +INTERRUPT64(0xf1) +INTERRUPT64(0xf2) +INTERRUPT64(0xf3) +INTERRUPT64(0xf4) +INTERRUPT64(0xf5) +INTERRUPT64(0xf6) +INTERRUPT64(0xf7) +INTERRUPT64(0xf8) +INTERRUPT64(0xf9) +INTERRUPT64(0xfa) +INTERRUPT64(0xfb) +INTERRUPT64(0xfc) +INTERRUPT64(0xfd) +INTERRUPT64(0xfe) +EXCEPTION64(0xff,t64_preempt) + + + .text +/* + * + * Trap/interrupt entry points. + * + * All traps must create the following 32-bit save area on the PCB "stack" + * - this is identical to the legacy mode 32-bit case: + * + * gs + * fs + * es + * ds + * edi + * esi + * ebp + * cr2 (defined only for page fault) + * ebx + * edx + * ecx + * eax + * trap number + * error code + * eip + * cs + * eflags + * user esp - if from user + * user ss - if from user + * + * Above this is the trap number and compatibility mode handler address + * (packed into an 8-byte stack entry) and the 64-bit interrupt stack frame: + * + * (trapno, trapfn) + * err + * rip + * cs + * rflags + * rsp + * ss + * + */ + + .code32 +/* + * Control is passed here to return to the compatibility mode user. + * At this stage we're in kernel space in compatibility mode + * but we need to switch into 64-bit mode in the 4G-based trampoline + * space before performing the iret. + */ +Entry(lo64_ret_to_user) + movl %gs:CPU_ACTIVE_THREAD,%ecx + + movl ACT_PCB_IDS(%ecx),%eax /* Obtain this thread's debug state */ + cmpl $0,%eax /* Is there a debug register context? */ + je 2f /* branch if not */ + cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 64-bit task? */ + jne 1f + movl DS_DR0(%eax), %ecx /* If not, load the 32 bit DRs */ + movl %ecx, %db0 + movl DS_DR1(%eax), %ecx + movl %ecx, %db1 + movl DS_DR2(%eax), %ecx + movl %ecx, %db2 + movl DS_DR3(%eax), %ecx + movl %ecx, %db3 + movl DS_DR7(%eax), %ecx + movl %ecx, %gs:CPU_DR7 + movl $0, %gs:CPU_DR7 + 4 + jmp 2f +1: + ENTER_64BIT_MODE() /* Enter long mode */ + mov DS64_DR0(%eax), %rcx /* Load the full width DRs*/ + mov %rcx, %dr0 + mov DS64_DR1(%eax), %rcx + mov %rcx, %dr1 + mov DS64_DR2(%eax), %rcx + mov %rcx, %dr2 + mov DS64_DR3(%eax), %rcx + mov %rcx, %dr3 + mov DS64_DR7(%eax), %rcx + mov %rcx, %gs:CPU_DR7 + jmp 3f /* Enter uberspace */ +2: + ENTER_64BIT_MODE() +3: + ENTER_UBERSPACE() + + /* + * Now switch %cr3, if necessary. + */ + swapgs /* switch back to uber-kernel gs base */ + mov %gs:CPU_TASK_CR3,%rcx + mov %rcx,%gs:CPU_ACTIVE_CR3 + mov %cr3, %rax + cmp %rcx, %rax + je 1f + /* flag the copyio engine state as WINDOWS_CLEAN */ + mov %gs:CPU_ACTIVE_THREAD,%eax + movl $(WINDOWS_CLEAN),ACT_COPYIO_STATE(%eax) + mov %rcx,%cr3 /* switch to user's address space */ +1: + + mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/ + cmp $0, %rax + je 1f + mov %rax, %dr7 /* Set DR7 */ + movq $0, %gs:CPU_DR7 +1: + + /* + * Adjust stack to use uber-space. + */ + mov $(KERNEL_UBER_BASE_HI32), %rax + shl $32, %rsp + shrd $32, %rax, %rsp /* relocate into uber-space */ + + cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ + jne L_64bit_return + jmp L_32bit_return + +Entry(lo64_ret_to_kernel) + ENTER_64BIT_MODE() + ENTER_UBERSPACE() + + swapgs /* switch back to uber-kernel gs base */ + + /* + * Adjust stack to use uber-space. + */ + mov $(KERNEL_UBER_BASE_HI32), %rax + shl $32, %rsp + shrd $32, %rax, %rsp /* relocate into uber-space */ + + /* Check for return to 64-bit kernel space (EFI today) */ + cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ + jne L_64bit_return + /* fall through for 32-bit return */ + +L_32bit_return: + /* + * Restore registers into the machine state for iret. + */ + movl R_EIP(%rsp), %eax + movl %eax, ISC32_RIP(%rsp) + movl R_EFLAGS(%rsp), %eax + movl %eax, ISC32_RFLAGS(%rsp) + movl R_CS(%rsp), %eax + movl %eax, ISC32_CS(%rsp) + movl R_UESP(%rsp), %eax + movl %eax, ISC32_RSP(%rsp) + movl R_SS(%rsp), %eax + movl %eax, ISC32_SS(%rsp) + + /* + * Restore general 32-bit registers + */ + movl R_EAX(%rsp), %eax + movl R_EBX(%rsp), %ebx + movl R_ECX(%rsp), %ecx + movl R_EDX(%rsp), %edx + movl R_EBP(%rsp), %ebp + movl R_ESI(%rsp), %esi + movl R_EDI(%rsp), %edi + + /* + * Restore segment registers. We make take an exception here but + * we've got enough space left in the save frame area to absorb + * a hardware frame plus the trapfn and trapno + */ + swapgs +EXT(ret32_set_ds): + movw R_DS(%rsp), %ds +EXT(ret32_set_es): + movw R_ES(%rsp), %es +EXT(ret32_set_fs): + movw R_FS(%rsp), %fs +EXT(ret32_set_gs): + movw R_GS(%rsp), %gs + + add $(ISC32_OFFSET)+8+8, %rsp /* pop compat frame + + trapno/trapfn and error */ + cmp $(SYSENTER_CS),ISF64_CS-8-8(%rsp) + /* test for fast entry/exit */ + je L_fast_exit +EXT(ret32_iret): + iretq /* return from interrupt */ + +L_fast_exit: + pop %rdx /* user return eip */ + pop %rcx /* pop and toss cs */ + andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */ + popf /* flags - carry denotes failure */ + pop %rcx /* user return esp */ + .code32 + sti /* interrupts enabled after sysexit */ + sysexit /* 32-bit sysexit */ + .code64 + +L_64bit_return: + /* + * Set the GS Base MSR with the user's gs base. + */ + movl %gs:CPU_UBER_USER_GS_BASE, %eax + movl %gs:CPU_UBER_USER_GS_BASE+4, %edx + movl $(MSR_IA32_GS_BASE), %ecx + swapgs + testb $3, R64_CS(%rsp) /* returning to user-space? */ + jz 1f + wrmsr /* set 64-bit base */ +1: + + /* + * Restore general 64-bit registers + */ + mov R64_R15(%rsp), %r15 + mov R64_R14(%rsp), %r14 + mov R64_R13(%rsp), %r13 + mov R64_R12(%rsp), %r12 + mov R64_R11(%rsp), %r11 + mov R64_R10(%rsp), %r10 + mov R64_R9(%rsp), %r9 + mov R64_R8(%rsp), %r8 + mov R64_RSI(%rsp), %rsi + mov R64_RDI(%rsp), %rdi + mov R64_RBP(%rsp), %rbp + mov R64_RDX(%rsp), %rdx + mov R64_RBX(%rsp), %rbx + mov R64_RCX(%rsp), %rcx + mov R64_RAX(%rsp), %rax + + add $(ISS64_OFFSET)+8+8, %rsp /* pop saved state frame + + trapno/trapfn and error */ + cmpl $(SYSCALL_CS),ISF64_CS-8-8(%rsp) + /* test for fast entry/exit */ + je L_sysret +EXT(ret64_iret): + iretq /* return from interrupt */ + +L_sysret: + /* + * Here to load rcx/r11/rsp and perform the sysret back to user-space. + * rcx user rip + * r1 user rflags + * rsp user stack pointer + */ + mov ISF64_RIP-16(%rsp), %rcx + mov ISF64_RFLAGS-16(%rsp), %r11 + mov ISF64_RSP-16(%rsp), %rsp + sysretq /* return from system call */ + +/* + * Common path to enter locore handlers. + */ +L_enter_lohandler: + swapgs /* switch to kernel gs (cpu_data) */ +L_enter_lohandler_continue: + cmpl $(USER64_CS), ISF64_CS(%rsp) + je L_64bit_enter /* this is a 64-bit user task */ + cmpl $(KERNEL64_CS), ISF64_CS(%rsp) + je L_64bit_enter /* we're in 64-bit (EFI) code */ + jmp L_32bit_enter + +/* + * System call handlers. + * These are entered via a syscall interrupt. The system call number in %rax + * is saved to the error code slot in the stack frame. We then branch to the + * common state saving code. + */ + +Entry(hi64_unix_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_unix_scall_continue: + push %rax /* save system call number */ + push $(UNIX_INT) + movl $(LO_UNIX_SCALL), 4(%rsp) + jmp L_32bit_enter_check + + +Entry(hi64_mach_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_mach_scall_continue: + push %rax /* save system call number */ + push $(MACH_INT) + movl $(LO_MACH_SCALL), 4(%rsp) + jmp L_32bit_enter_check + + +Entry(hi64_mdep_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_mdep_scall_continue: + push %rax /* save system call number */ + push $(MACHDEP_INT) + movl $(LO_MDEP_SCALL), 4(%rsp) + jmp L_32bit_enter_check + + +Entry(hi64_diag_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_diag_scall_continue: + push %rax /* save system call number */ + push $(DIAG_INT) + movl $(LO_DIAG_SCALL), 4(%rsp) + jmp L_32bit_enter_check + +Entry(hi64_syscall) + swapgs /* Kapow! get per-cpu data area */ +L_syscall_continue: + mov %rsp, %gs:CPU_UBER_TMP /* save user stack */ + mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */ + + /* + * Save values in the ISF frame in the PCB + * to cons up the saved machine state. + */ + movl $(USER_DS), ISF64_SS(%rsp) + movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */ + mov %r11, ISF64_RFLAGS(%rsp) /* rflags */ + mov %rcx, ISF64_RIP(%rsp) /* rip */ + mov %gs:CPU_UBER_TMP, %rcx + mov %rcx, ISF64_RSP(%rsp) /* user stack */ + mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */ + movl $(0), ISF64_TRAPNO(%rsp) /* trapno */ + movl $(LO_SYSCALL), ISF64_TRAPFN(%rsp) + jmp L_64bit_enter /* this can only be a 64-bit task */ + +/* + * sysenter entry point + * Requires user code to set up: + * edx: user instruction pointer (return address) + * ecx: user stack pointer + * on which is pushed stub ret addr and saved ebx + * Return to user-space is made using sysexit. + * Note: sysenter/sysexit cannot be used for calls returning a value in edx, + * or requiring ecx to be preserved. + */ +Entry(hi64_sysenter) + mov (%rsp), %rsp /* switch from temporary stack to pcb */ + /* + * Push values on to the PCB stack + * to cons up the saved machine state. + */ + push $(USER_DS) /* ss */ + push %rcx /* uesp */ + pushf /* flags */ + /* + * Clear, among others, the Nested Task (NT) flags bit; + * This is cleared by INT, but not by sysenter, which only + * clears RF, VM and IF. + */ + push $0 + popf + push $(SYSENTER_CS) /* cs */ + swapgs /* switch to kernel gs (cpu_data) */ +L_sysenter_continue: + push %rdx /* eip */ + push %rax /* err/eax - syscall code */ + push $(0) + movl $(LO_SYSENTER), ISF64_TRAPFN(%rsp) + orl $(EFL_IF), ISF64_RFLAGS(%rsp) + +L_32bit_enter_check: + /* + * Check we're not a confused 64-bit user. + */ + cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP + jne L_64bit_entry_reject + /* fall through to 32-bit handler: */ + +L_32bit_enter: + /* + * Make space for the compatibility save area. + */ + sub $(ISC32_OFFSET), %rsp + movl $(SS_32), SS_FLAVOR(%rsp) + + /* + * Save segment regs + */ + mov %ds, R_DS(%rsp) + mov %es, R_ES(%rsp) + mov %fs, R_FS(%rsp) + mov %gs, R_GS(%rsp) + + /* + * Save general 32-bit registers + */ + mov %eax, R_EAX(%rsp) + mov %ebx, R_EBX(%rsp) + mov %ecx, R_ECX(%rsp) + mov %edx, R_EDX(%rsp) + mov %ebp, R_EBP(%rsp) + mov %esi, R_ESI(%rsp) + mov %edi, R_EDI(%rsp) + + /* Unconditionally save cr2; only meaningful on page faults */ + mov %cr2, %rax + mov %eax, R_CR2(%rsp) + + /* + * Copy registers already saved in the machine state + * (in the interrupt stack frame) into the compat save area. + */ + mov ISC32_RIP(%rsp), %eax + mov %eax, R_EIP(%rsp) + mov ISC32_RFLAGS(%rsp), %eax + mov %eax, R_EFLAGS(%rsp) + mov ISC32_CS(%rsp), %eax + mov %eax, R_CS(%rsp) + mov ISC32_RSP(%rsp), %eax + mov %eax, R_UESP(%rsp) + mov ISC32_SS(%rsp), %eax + mov %eax, R_SS(%rsp) +L_32bit_enter_after_fault: + mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ + mov %ebx, R_TRAPNO(%rsp) + mov ISC32_ERR(%rsp), %eax + mov %eax, R_ERR(%rsp) + mov ISC32_TRAPFN(%rsp), %edx + +/* + * Common point to enter lo_handler in compatibilty mode: + * %ebx trapno + * %edx locore handler address + */ +L_enter_lohandler2: + /* + * Switch address space to kernel + * if not shared space and not already mapped. + * Note: cpu_task_map is valid only if cpu_task_cr3 is loaded in cr3. + */ + mov %cr3, %rax + mov %gs:CPU_TASK_CR3, %rcx + cmp %rax, %rcx /* is the task's cr3 loaded? */ + jne 1f + cmpl $(TASK_MAP_64BIT_SHARED), %gs:CPU_TASK_MAP + je 2f +1: + mov %gs:CPU_KERNEL_CR3, %rcx + cmp %rax, %rcx + je 2f + mov %rcx, %cr3 + mov %rcx, %gs:CPU_ACTIVE_CR3 +2: + /* + * Switch to compatibility mode. + * Then establish kernel segments. + */ + swapgs /* Done with uber-kernel gs */ + ENTER_COMPAT_MODE() + + /* + * Now in compatibility mode and running in compatibility space + * prepare to enter the locore handler. + * %ebx trapno + * %edx lo_handler pointer + * Note: the stack pointer (now 32-bit) is now directly addressing the + * the kernel below 4G and therefore is automagically re-based. + */ + mov $(KERNEL_DS), %eax + mov %eax, %ss + mov %eax, %ds + mov %eax, %es + mov %eax, %fs + mov $(CPU_DATA_GS), %eax + mov %eax, %gs + + movl %gs:CPU_ACTIVE_THREAD,%ecx /* Get the active thread */ + cmpl $0, ACT_PCB_IDS(%ecx) /* Is there a debug register state? */ + je 1f + movl $0, %ecx /* If so, reset DR7 (the control) */ + movl %ecx, %dr7 +1: + addl $1,%gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count + + /* Dispatch the designated lo handler */ + jmp *%edx + + .code64 +L_64bit_entry_reject: + /* + * Here for a 64-bit user attempting an invalid kernel entry. + */ + movl $(LO_ALLTRAPS), ISF64_TRAPFN(%rsp) + movl $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp) + /* Fall through... */ + +L_64bit_enter: + /* + * Here for a 64-bit user task, or special 64-bit kernel code. + * Make space for the save area. + */ + sub $(ISS64_OFFSET), %rsp + movl $(SS_64), SS_FLAVOR(%rsp) + + /* + * Save segment regs + */ + mov %fs, R64_FS(%rsp) + mov %gs, R64_GS(%rsp) + + /* Save general-purpose registers */ + mov %rax, R64_RAX(%rsp) + mov %rcx, R64_RCX(%rsp) + mov %rbx, R64_RBX(%rsp) + mov %rbp, R64_RBP(%rsp) + mov %r11, R64_R11(%rsp) + mov %r12, R64_R12(%rsp) + mov %r13, R64_R13(%rsp) + mov %r14, R64_R14(%rsp) + mov %r15, R64_R15(%rsp) + + /* cr2 is significant only for page-faults */ + mov %cr2, %rax + mov %rax, R64_CR2(%rsp) + + /* Other registers (which may contain syscall args) */ + mov %rdi, R64_RDI(%rsp) /* arg0 .. */ + mov %rsi, R64_RSI(%rsp) + mov %rdx, R64_RDX(%rsp) + mov %r10, R64_R10(%rsp) + mov %r8, R64_R8(%rsp) + mov %r9, R64_R9(%rsp) /* .. arg5 */ + +L_64bit_enter_after_fault: + /* + * At this point we're almost ready to join the common lo-entry code. + */ + mov R64_TRAPNO(%rsp), %ebx + mov R64_TRAPFN(%rsp), %edx + + jmp L_enter_lohandler2 + +/* + * Debug trap. Check for single-stepping across system call into + * kernel. If this is the case, taking the debug trap has turned + * off single-stepping - save the flags register with the trace + * bit set. + */ +Entry(hi64_debug) + swapgs /* set %gs for cpu data */ + push $0 /* error code */ + push $(T_DEBUG) + movl $(LO_ALLTRAPS), ISF64_TRAPFN(%rsp) + + testb $3, ISF64_CS(%rsp) + jnz L_enter_lohandler_continue + + /* + * trap came from kernel mode + */ + cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) + jne L_enter_lohandler_continue /* trap not in uber-space */ + + cmpl $(EXT(hi64_mach_scall)), ISF64_RIP(%rsp) + jne 6f + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_mach_scall_continue /* continue system call entry */ +6: + cmpl $(EXT(hi64_mdep_scall)), ISF64_RIP(%rsp) + jne 5f + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_mdep_scall_continue /* continue system call entry */ +5: + cmpl $(EXT(hi64_unix_scall)), ISF64_RIP(%rsp) + jne 4f + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_unix_scall_continue /* continue system call entry */ +4: + cmpl $(EXT(hi64_sysenter)), ISF64_RIP(%rsp) + jne L_enter_lohandler_continue + /* + * Interrupt stack frame has been pushed on the temporary stack. + * We have to switch to pcb stack and copy eflags. + */ + add $32,%rsp /* remove trapno/trapfn/err/rip/cs */ + push %rcx /* save %rcx - user stack pointer */ + mov 32(%rsp),%rcx /* top of intr stack -> pcb stack */ + xchg %rcx,%rsp /* switch to pcb stack */ + push $(USER_DS) /* ss */ + push (%rcx) /* saved %rcx into rsp slot */ + push 8(%rcx) /* rflags */ + mov (%rcx),%rcx /* restore %rcx */ + push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */ + jmp L_sysenter_continue /* continue sysenter entry */ + + +Entry(hi64_double_fault) + swapgs /* set %gs for cpu data */ + push $(T_DOUBLE_FAULT) + movl $(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp) + + cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) + jne L_enter_lohandler_continue /* trap not in uber-space */ + + cmpl $(EXT(hi64_syscall)), ISF64_RIP(%rsp) + jne L_enter_lohandler_continue + + mov ISF64_RSP(%rsp), %rsp + jmp L_syscall_continue + + +/* + * General protection or segment-not-present fault. + * Check for a GP/NP fault in the kernel_return + * sequence; if there, report it as a GP/NP fault on the user's instruction. + * + * rsp-> 0: trap code (NP or GP) and trap function + * 8: segment number in error (error code) + * 16 rip + * 24 cs + * 32 rflags + * 40 rsp + * 48 ss + * 56 old registers (trap is from kernel) + */ +Entry(hi64_gen_prot) + push $(T_GENERAL_PROTECTION) + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +Entry(hi64_segnp) + push $(T_SEGMENT_NOT_PRESENT) + /* indicate fault type */ +trap_check_kernel_exit: + movl $(LO_ALLTRAPS), 4(%rsp) + testb $3,24(%rsp) + jnz hi64_take_trap + /* trap was from kernel mode, so */ + /* check for the kernel exit sequence */ + cmpl $(KERNEL_UBER_BASE_HI32), 16+4(%rsp) + jne hi64_take_trap /* trap not in uber-space */ + + cmpl $(EXT(ret32_iret)), 16(%rsp) + je L_fault_iret + cmpl $(EXT(ret32_set_ds)), 16(%rsp) + je L_32bit_fault_set_seg + cmpl $(EXT(ret32_set_es)), 16(%rsp) + je L_32bit_fault_set_seg + cmpl $(EXT(ret32_set_fs)), 16(%rsp) + je L_32bit_fault_set_seg + cmpl $(EXT(ret32_set_gs)), 16(%rsp) + je L_32bit_fault_set_seg + + cmpl $(EXT(ret64_iret)), 16(%rsp) + je L_fault_iret + +hi64_take_trap: + jmp L_enter_lohandler + + +/* + * GP/NP fault on IRET: CS or SS is in error. + * All registers contain the user's values. + * + * on SP is + * 0 trap number/function + * 8 errcode + * 16 rip + * 24 cs + * 32 rflags + * 40 rsp --> new trapno + * 48 ss --> new errcode + * 56 user rip + * 64 user cs + * 72 user rflags + * 80 user rsp + * 88 user ss + */ +L_fault_iret: + mov %rax, 16(%rsp) /* save rax (we don`t need saved rip) */ + pop %rax /* get trap number */ + mov %rax, 40-8(%rsp) /* put in user trap number */ + pop %rax /* get error code */ + mov %rax, 48-8-8(%rsp) /* put in user errcode */ + pop %rax /* restore rax */ + add $16,%rsp /* eat 2 more slots */ + /* now treat as fault from user */ + jmp L_enter_lohandler + +/* + * Fault restoring a segment register. All of the saved state is still + * on the stack untouched since we haven't yet moved the stack pointer. + */ +L_32bit_fault_set_seg: + pop %rax /* get trap number/function */ + pop %rdx /* get error code */ + add $40,%rsp /* pop stack to saved state */ + mov %rax,ISC32_TRAPNO(%rsp) + mov %rdx,ISC32_ERR(%rsp) + /* now treat as fault from user */ + /* except that all the state is */ + /* already saved - we just have to */ + /* move the trapno and error into */ + /* the compatibility frame */ + swapgs + jmp L_32bit_enter_after_fault + + +/* + * Fatal exception handlers: + */ +Entry(db_task_dbl_fault64) + push $(T_DOUBLE_FAULT) + movl $(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp) + jmp L_enter_lohandler + +Entry(db_task_stk_fault64) +Entry(hi64_stack_fault) + push $(T_STACK_FAULT) + movl $(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp) + jmp L_enter_lohandler + +Entry(mc64) + push $(0) /* Error */ + push $(T_MACHINE_CHECK) + movl $(LO_MACHINE_CHECK), ISF64_TRAPFN(%rsp) + jmp L_enter_lohandler diff --git a/osfmk/i386/io_emulate.c b/osfmk/i386/io_emulate.c index f9ed656d5..a84c041bc 100644 --- a/osfmk/i386/io_emulate.c +++ b/osfmk/i386/io_emulate.c @@ -70,7 +70,7 @@ #if 1 int emulate_io( - __unused struct i386_saved_state *regs, + __unused x86_saved_state32_t *regs, __unused int opcode, __unused int io_port) { diff --git a/osfmk/i386/io_emulate.h b/osfmk/i386/io_emulate.h index 239fd081c..2eb6ce595 100644 --- a/osfmk/i386/io_emulate.h +++ b/osfmk/i386/io_emulate.h @@ -58,7 +58,7 @@ * Return codes from IO emulation. */ extern int emulate_io( - struct i386_saved_state *regs, + x86_saved_state32_t *regs, int opcode, int io_port); diff --git a/osfmk/i386/io_map.c b/osfmk/i386/io_map.c index 2a9ae6ddf..988b0f8cd 100644 --- a/osfmk/i386/io_map.c +++ b/osfmk/i386/io_map.c @@ -63,9 +63,7 @@ extern vm_offset_t virtual_avail; * Mach VM is running. */ vm_offset_t -io_map(phys_addr, size) - vm_offset_t phys_addr; - vm_size_t size; +io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; @@ -77,19 +75,21 @@ io_map(phys_addr, size) virtual_avail += round_page(size); (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size), - VM_PROT_READ|VM_PROT_WRITE); + VM_PROT_READ|VM_PROT_WRITE, + flags); } else { (void) kmem_alloc_pageable(kernel_map, &start, round_page(size)); (void) pmap_map(start, phys_addr, phys_addr + round_page(size), - VM_PROT_READ|VM_PROT_WRITE); + VM_PROT_READ|VM_PROT_WRITE, + flags); } return (start); } /* just wrap this since io_map handles it */ -vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size) +vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) { - return (io_map(phys_addr, size)); + return (io_map(phys_addr, size, flags)); } diff --git a/osfmk/i386/io_map_entries.h b/osfmk/i386/io_map_entries.h index ebde6d42a..338cb6954 100644 --- a/osfmk/i386/io_map_entries.h +++ b/osfmk/i386/io_map_entries.h @@ -34,8 +34,9 @@ __BEGIN_DECLS extern vm_offset_t io_map( vm_offset_t phys_addr, - vm_size_t size); -extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size); + vm_size_t size, + unsigned int flags); +extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags); __END_DECLS #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/i386/iopb.c b/osfmk/i386/iopb.c index ecf1a8c68..6ae4acb7a 100644 --- a/osfmk/i386/iopb.c +++ b/osfmk/i386/iopb.c @@ -394,7 +394,7 @@ i386_io_port_add( /* Make sure the thread has a TSS. */ simple_lock(&pcb->lock); - io_tss = pcb->ims.io_tss; + io_tss = pcb->io_tss; if (io_tss == 0) { if (new_io_tss == 0) { /* @@ -409,7 +409,7 @@ i386_io_port_add( goto Retry; } io_tss = new_io_tss; - pcb->ims.io_tss = io_tss; + pcb->io_tss = io_tss; new_io_tss = 0; } @@ -483,7 +483,7 @@ i386_io_port_remove( } simple_lock(&pcb->lock); - io_tss = pcb->ims.io_tss; + io_tss = pcb->io_tss; if (io_tss == 0) { simple_unlock(&pcb->lock); simple_unlock(&iopb_lock); @@ -563,7 +563,7 @@ i386_io_port_list(thread, list, list_count) simple_lock(&iopb_lock); simple_lock(&pcb->lock); - io_tss = pcb->ims.io_tss; + io_tss = pcb->io_tss; if (io_tss != 0) { register io_use_t iu; @@ -648,7 +648,7 @@ iopb_check_mapping( /* Look up the mapping in the device`s mapping list. */ queue_iterate(&io_port->io_use_list, iu, io_use_t, psq) { - if (iu->ts == pcb->ims.io_tss) { + if (iu->ts == pcb->io_tss) { /* * Device is mapped. */ diff --git a/osfmk/i386/ktss.c b/osfmk/i386/ktss.c index 23a39df8f..24d031229 100644 --- a/osfmk/i386/ktss.c +++ b/osfmk/i386/ktss.c @@ -56,13 +56,14 @@ * We don't use the i386 task switch mechanism. We need a TSS * only to hold the kernel stack pointer for the current thread. * - * XXX multiprocessor?? */ #include #include #include -struct i386_tss ktss = { +struct i386_tss master_ktss + __attribute__ ((section ("__DESC, master_ktss"))) + __attribute__ ((aligned (4096))) = { 0, /* back link */ 0, /* esp0 */ KERNEL_DS, /* ss0 */ @@ -94,9 +95,104 @@ struct i386_tss ktss = { so no bitmap */ }; +/* + * The transient stack for sysenter. + * At its top is a 32-bit link to the PCB in legacy mode, 64-bit otherwise. + * NB: it also must be large enough to contain a interrupt stack frame + * due to a single-step trace trap at system call entry. + */ +struct sysenter_stack master_sstk + __attribute__ ((section ("__DESC, master_sstk"))) + __attribute__ ((aligned (16))) = { {0}, 0 }; + +#ifdef X86_64 +struct x86_64_tss master_ktss64 __attribute__ ((aligned (4096))) = { + .io_bit_map_offset = 0x0FFF, +}; +#endif /* X86_64 */ + + + +/* + * Task structure for double-fault handler: + */ +struct i386_tss master_dftss + __attribute__ ((section ("__DESC, master_dftss"))) + __attribute__ ((aligned (4096))) = { + 0, /* back link */ + (int) &df_task_stack_end - 4, /* esp0 */ + KERNEL_DS, /* ss0 */ + 0, /* esp1 */ + 0, /* ss1 */ + 0, /* esp2 */ + 0, /* ss2 */ + 0, /* cr3 */ + (int) &df_task_start, /* eip */ + 0, /* eflags */ + 0, /* eax */ + 0, /* ecx */ + 0, /* edx */ + 0, /* ebx */ + (int) &df_task_stack_end - 4, /* esp */ + 0, /* ebp */ + 0, /* esi */ + 0, /* edi */ + KERNEL_DS, /* es */ + KERNEL_CS, /* cs */ + KERNEL_DS, /* ss */ + KERNEL_DS, /* ds */ + KERNEL_DS, /* fs */ + CPU_DATA_GS, /* gs */ + KERNEL_LDT, /* ldt */ + 0, /* trace_trap */ + 0x0FFF /* IO bitmap offset - + beyond end of TSS segment, + so no bitmap */ +}; + + +/* + * Task structure for machine_check handler: + */ +struct i386_tss master_mctss + __attribute__ ((section ("__DESC, master_mctss"))) + __attribute__ ((aligned (4096))) = { + 0, /* back link */ + (int) &mc_task_stack_end - 4, /* esp0 */ + KERNEL_DS, /* ss0 */ + 0, /* esp1 */ + 0, /* ss1 */ + 0, /* esp2 */ + 0, /* ss2 */ + 0, /* cr3 */ + (int) &mc_task_start, /* eip */ + 0, /* eflags */ + 0, /* eax */ + 0, /* ecx */ + 0, /* edx */ + 0, /* ebx */ + (int) &mc_task_stack_end - 4, /* esp */ + 0, /* ebp */ + 0, /* esi */ + 0, /* edi */ + KERNEL_DS, /* es */ + KERNEL_CS, /* cs */ + KERNEL_DS, /* ss */ + KERNEL_DS, /* ds */ + KERNEL_DS, /* fs */ + CPU_DATA_GS, /* gs */ + KERNEL_LDT, /* ldt */ + 0, /* trace_trap */ + 0x0FFF /* IO bitmap offset - + beyond end of TSS segment, + so no bitmap */ +}; + #if MACH_KDB -struct i386_tss dbtss = { +struct i386_tss master_dbtss + __attribute__ ((section ("__DESC, master_dbtss"))) + __attribute__ ((aligned (4096))) = { 0, /* back link */ 0, /* esp0 */ KERNEL_DS, /* ss0 */ diff --git a/osfmk/i386/ldt.c b/osfmk/i386/ldt.c index 6c5f9840a..2ad1ec04b 100644 --- a/osfmk/i386/ldt.c +++ b/osfmk/i386/ldt.c @@ -59,46 +59,42 @@ #include #include -extern int syscall(void); -extern int mach_rpc(void); - -struct fake_descriptor ldt[LDTSZ] = { -/*007*/ { (unsigned int)&syscall, - KERNEL_CS, - 0, /* no parameters */ - ACC_P|ACC_PL_U|ACC_CALL_GATE - }, /* call gate for system calls */ -/*00F*/ { (unsigned int)&mach_rpc, - KERNEL_CS, - 0, /* no parameters */ - ACC_P|ACC_PL_U|ACC_CALL_GATE - }, /* call gate for mach rpc */ -/*017*/ { 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_CODE_R - }, /* user code segment */ -/*01F*/ { 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_DATA_W - }, /* user data segment */ -/*027*/ { 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_DATA_W - }, /* user cthread segment */ -// Storage space for user ldt entries we will make room for 10 entries initially -// as we will probably never need many more than that -/*02F*/ { 0, 0, 0, 0}, -/*037*/ { 0, 0, 0, 0}, -/*03F*/ { 0, 0, 0, 0}, -/*047*/ { 0, 0, 0, 0}, -/*04F*/ { 0, 0, 0, 0}, -/*057*/ { 0, 0, 0, 0}, -/*05F*/ { 0, 0, 0, 0}, -/*067*/ { 0, 0, 0, 0}, -/*06F*/ { 0, 0, 0, 0}, -/*077*/ { 0, 0, 0, 0}, +struct fake_descriptor master_ldt[LDTSZ] __attribute__ ((aligned (4096))) = { + [SEL_TO_INDEX(SYSENTER_CS)] { /* kernel code (sysenter) */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_CODE_R + }, + [SEL_TO_INDEX(SYSENTER_DS)] { /* kernel data (sysenter) */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_DATA_W + }, + [SEL_TO_INDEX(USER_CS)] { /* user code segment */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_CODE_R + }, + [SEL_TO_INDEX(USER_DS)] { /* user data segment */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W + }, + [SEL_TO_INDEX(USER64_CS)] { /* user 64-bit code segment */ + 0, + 0xfffff, + SZ_64|SZ_G, + ACC_P|ACC_PL_U|ACC_CODE_R + }, + [SEL_TO_INDEX(USER_CTHREAD)] { /* user cthread segment */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W + }, }; diff --git a/osfmk/i386/lock.h b/osfmk/i386/lock.h index 8f31c69bb..1f11129e7 100644 --- a/osfmk/i386/lock.h +++ b/osfmk/i386/lock.h @@ -87,15 +87,7 @@ typedef struct { #endif /* MACH_LDEBUG */ } mutex_t; -typedef struct { - decl_simple_lock_data(,interlock) /* "hardware" interlock field */ - volatile unsigned int - read_count:16, /* No. of accepted readers */ - want_upgrade:1, /* Read-to-write upgrade waiting */ - want_write:1, /* Writer is waiting, or locked for write */ - waiting:1, /* Someone is sleeping on lock */ - can_sleep:1; /* Can attempts to lock go to sleep? */ -} lock_t; +typedef lck_rw_t lock_t; extern unsigned int LockTimeOut; /* Number of hardware ticks of a lock timeout */ @@ -152,21 +144,21 @@ static inline unsigned long i_bit_isset(unsigned int test, volatile unsigned lon static inline char xchgb(volatile char * cp, char new); -static inline void atomic_incl(long * p, long delta); -static inline void atomic_incs(short * p, short delta); -static inline void atomic_incb(char * p, char delta); +static inline void atomic_incl(volatile long * p, long delta); +static inline void atomic_incs(volatile short * p, short delta); +static inline void atomic_incb(volatile char * p, char delta); -static inline void atomic_decl(long * p, long delta); -static inline void atomic_decs(short * p, short delta); -static inline void atomic_decb(char * p, char delta); +static inline void atomic_decl(volatile long * p, long delta); +static inline void atomic_decs(volatile short * p, short delta); +static inline void atomic_decb(volatile char * p, char delta); -static inline long atomic_getl(long * p); -static inline short atomic_gets(short * p); -static inline char atomic_getb(char * p); +static inline long atomic_getl(const volatile long * p); +static inline short atomic_gets(const volatile short * p); +static inline char atomic_getb(const volatile char * p); -static inline void atomic_setl(long * p, long value); -static inline void atomic_sets(short * p, short value); -static inline void atomic_setb(char * p, char value); +static inline void atomic_setl(volatile long * p, long value); +static inline void atomic_sets(volatile short * p, short value); +static inline void atomic_setb(volatile char * p, char value); static inline char xchgb(volatile char * cp, char new) { @@ -199,35 +191,7 @@ atomic_cmpxchg(uint32_t *p, uint32_t old, uint32_t new) return (res); } -static inline uint64_t -atomic_load64(uint64_t *quadp) -{ - uint64_t ret; - - asm volatile( - " lock; cmpxchg8b %1" - : "=A" (ret) - : "m" (*quadp), "a" (0), "d" (0), "b" (0), "c" (0)); - return (ret); -} - -static inline uint64_t -atomic_loadstore64(uint64_t *quadp, uint64_t new) -{ - uint64_t ret; - - ret = *quadp; - asm volatile( - "1: \n\t" - " lock; cmpxchg8b %1 \n\t" - " jnz 1b" - : "+A" (ret) - : "m" (*quadp), - "b" ((uint32_t)new), "c" ((uint32_t)(new >> 32))); - return (ret); -} - -static inline void atomic_incl(long * p, long delta) +static inline void atomic_incl(volatile long * p, long delta) { __asm__ volatile (" lock \n \ addl %0,%1" : \ @@ -235,7 +199,7 @@ static inline void atomic_incl(long * p, long delta) "r" (delta), "m" (*(volatile long *)p)); } -static inline void atomic_incs(short * p, short delta) +static inline void atomic_incs(volatile short * p, short delta) { __asm__ volatile (" lock \n \ addw %0,%1" : \ @@ -243,7 +207,7 @@ static inline void atomic_incs(short * p, short delta) "q" (delta), "m" (*(volatile short *)p)); } -static inline void atomic_incb(char * p, char delta) +static inline void atomic_incb(volatile char * p, char delta) { __asm__ volatile (" lock \n \ addb %0,%1" : \ @@ -251,7 +215,7 @@ static inline void atomic_incb(char * p, char delta) "q" (delta), "m" (*(volatile char *)p)); } -static inline void atomic_decl(long * p, long delta) +static inline void atomic_decl(volatile long * p, long delta) { __asm__ volatile (" lock \n \ subl %0,%1" : \ @@ -259,7 +223,7 @@ static inline void atomic_decl(long * p, long delta) "r" (delta), "m" (*(volatile long *)p)); } -static inline int atomic_decl_and_test(long * p, long delta) +static inline int atomic_decl_and_test(volatile long * p, long delta) { uint8_t ret; asm volatile ( @@ -271,7 +235,7 @@ static inline int atomic_decl_and_test(long * p, long delta) return ret; } -static inline void atomic_decs(short * p, short delta) +static inline void atomic_decs(volatile short * p, short delta) { __asm__ volatile (" lock \n \ subw %0,%1" : \ @@ -279,7 +243,7 @@ static inline void atomic_decs(short * p, short delta) "q" (delta), "m" (*(volatile short *)p)); } -static inline void atomic_decb(char * p, char delta) +static inline void atomic_decb(volatile char * p, char delta) { __asm__ volatile (" lock \n \ subb %0,%1" : \ @@ -287,32 +251,32 @@ static inline void atomic_decb(char * p, char delta) "q" (delta), "m" (*(volatile char *)p)); } -static inline long atomic_getl(long * p) +static inline long atomic_getl(const volatile long * p) { return (*p); } -static inline short atomic_gets(short * p) +static inline short atomic_gets(const volatile short * p) { return (*p); } -static inline char atomic_getb(char * p) +static inline char atomic_getb(const volatile char * p) { return (*p); } -static inline void atomic_setl(long * p, long value) +static inline void atomic_setl(volatile long * p, long value) { *p = value; } -static inline void atomic_sets(short * p, short value) +static inline void atomic_sets(volatile short * p, short value) { *p = value; } -static inline void atomic_setb(char * p, char value) +static inline void atomic_setb(volatile char * p, char value) { *p = value; } diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h index 40e3340e3..c262da07c 100644 --- a/osfmk/i386/locks.h +++ b/osfmk/i386/locks.h @@ -82,6 +82,12 @@ typedef struct _lck_mtx_ { #define LCK_MTX_TAG_INDIRECT 0x00001007 /* lock marked as Indirect */ #define LCK_MTX_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ +/* Adaptive spin before blocking */ +extern unsigned int MutexSpin; +extern void lck_mtx_lock_spin(lck_mtx_t *lck); + +extern void lck_mtx_interlock_spin(lck_mtx_t *lck); + typedef struct { unsigned int type; vm_offset_t pc; @@ -103,9 +109,9 @@ typedef struct _lck_mtx_ext_ { } lck_mtx_ext_t; #define LCK_MTX_ATTR_DEBUG 0x1 -#define LCK_MTX_ATTR_DEBUGb 31 +#define LCK_MTX_ATTR_DEBUGb 0 #define LCK_MTX_ATTR_STAT 0x2 -#define LCK_MTX_ATTR_STATb 30 +#define LCK_MTX_ATTR_STATb 1 #else #ifdef KERNEL_PRIVATE @@ -123,12 +129,24 @@ typedef struct { volatile unsigned int read_count:16, /* No. of accepted readers */ want_upgrade:1, /* Read-to-write upgrade waiting */ - want_write:1, /* Writer is waiting, or locked for write */ + want_write:1, /* Writer waiting or locked for write */ waiting:1, /* Someone is sleeping on lock */ - can_sleep:1; /* Can attempts to lock go to sleep? */ + can_sleep:1, /* Can attempts to lock go to sleep? */ + read_priority:1;/* New read takes piority over write */ unsigned int lck_rw_tag; } lck_rw_t; +#define LCK_RW_ATTR_DEBUG 0x1 +#define LCK_RW_ATTR_DEBUGb 0 +#define LCK_RW_ATTR_STAT 0x2 +#define LCK_RW_ATTR_STATb 1 +#define LCK_RW_ATTR_READ_PRI 0x3 +#define LCK_RW_ATTR_READ_PRIb 2 +#define LCK_RW_ATTR_DIS_THREAD 0x40000000 +#define LCK_RW_ATTR_DIS_THREADb 30 +#define LCK_RW_ATTR_DIS_MYLOCK 0x10000000 +#define LCK_RW_ATTR_DIS_MYLOCKb 28 + #define LCK_RW_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ #else diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index 3e9b62fd7..be8e5e45b 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -78,9 +78,7 @@ #include #endif /* MACH_KDB */ -#ifdef __ppc__ -#include -#endif +#include #include @@ -91,6 +89,7 @@ #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105 +#define LCK_MTX_LCK_SPIN 0x200 #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) @@ -249,7 +248,7 @@ boolean_t lck_spin_try_lock( lck_spin_t *lck) { - usimple_lock_try((usimple_lock_t) lck); + return(usimple_lock_try((usimple_lock_t) lck)); } /* @@ -665,15 +664,14 @@ lock_init( lock_t *l, boolean_t can_sleep, __unused unsigned short tag, - unsigned short tag1) + __unused unsigned short tag1) { - (void) memset((void *) l, 0, sizeof(lock_t)); - - simple_lock_init(&l->interlock, tag1); + hw_lock_init(&l->interlock); l->want_write = FALSE; l->want_upgrade = FALSE; l->read_count = 0; l->can_sleep = can_sleep; + l->lck_rw_tag = tag; } @@ -689,162 +687,21 @@ void lock_write( register lock_t * l) { - register int i; - boolean_t lock_miss = FALSE; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - simple_lock(&l->interlock); - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - - /* - * Try to acquire the want_write bit. - */ - while (l->want_write) { - if (!lock_miss) { - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - want_write"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && l->want_write) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->want_write) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - l->want_write = TRUE; - - /* Wait for readers (and upgrades) to finish */ - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while ((l->read_count != 0) || l->want_upgrade) { - if (!lock_miss) { - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait for readers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && (l->read_count != 0 || - l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - simple_unlock(&l->interlock); + lck_rw_lock_exclusive(l); } void lock_done( register lock_t * l) { - boolean_t do_wakeup = FALSE; - - - simple_lock(&l->interlock); - - if (l->read_count != 0) { - l->read_count--; - } - else - if (l->want_upgrade) { - l->want_upgrade = FALSE; - } - else { - l->want_write = FALSE; - } - - /* - * There is no reason to wakeup a waiting thread - * if the read-count is non-zero. Consider: - * we must be dropping a read lock - * threads are waiting only if one wants a write lock - * if there are still readers, they can't proceed - */ - - if (l->waiting && (l->read_count == 0)) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - simple_unlock(&l->interlock); - - if (do_wakeup) - thread_wakeup((event_t) l); + (void) lck_rw_done(l); } void lock_read( register lock_t * l) { - register int i; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - simple_lock(&l->interlock); - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while (l->want_write || l->want_upgrade) { - i = lock_wait_time[l->can_sleep ? 1 : 0]; - - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait no writers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && (l->want_write || l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->want_write || l->want_upgrade)) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - l->read_count++; - - simple_unlock(&l->interlock); + lck_rw_lock_shared(l); } @@ -863,157 +720,16 @@ boolean_t lock_read_to_write( register lock_t * l) { - register int i; - boolean_t do_wakeup = FALSE; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - simple_lock(&l->interlock); - - l->read_count--; - - if (l->want_upgrade) { - /* - * Someone else has requested upgrade. - * Since we've released a read lock, wake - * him up. - */ - if (l->waiting && (l->read_count == 0)) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - simple_unlock(&l->interlock); - - if (do_wakeup) - thread_wakeup((event_t) l); - return (TRUE); - } - - l->want_upgrade = TRUE; - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while (l->read_count != 0) { - i = lock_wait_time[l->can_sleep ? 1 : 0]; - - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - read_count"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && l->read_count != 0) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->read_count != 0) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - simple_unlock(&l->interlock); - - return (FALSE); + return lck_rw_lock_shared_to_exclusive(l); } void lock_write_to_read( register lock_t * l) { - boolean_t do_wakeup = FALSE; - - simple_lock(&l->interlock); - - l->read_count++; - if (l->want_upgrade) - l->want_upgrade = FALSE; - else - l->want_write = FALSE; - - if (l->waiting) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - simple_unlock(&l->interlock); - - if (do_wakeup) - thread_wakeup((event_t) l); -} - - -#if 0 /* Unused */ -/* - * Routine: lock_try_write - * Function: - * Tries to get a write lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lock_try_write( - register lock_t * l) -{ - pc_t pc; - - simple_lock(&l->interlock); - - if (l->want_write || l->want_upgrade || l->read_count) { - /* - * Can't get lock. - */ - simple_unlock(&l->interlock); - return(FALSE); - } - - /* - * Have lock. - */ - - l->want_write = TRUE; - - simple_unlock(&l->interlock); - - return(TRUE); + lck_rw_lock_exclusive_to_shared(l); } -/* - * Routine: lock_try_read - * Function: - * Tries to get a read lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lock_try_read( - register lock_t * l) -{ - pc_t pc; - - simple_lock(&l->interlock); - - if (l->want_write || l->want_upgrade) { - simple_unlock(&l->interlock); - return(FALSE); - } - - l->read_count++; - - simple_unlock(&l->interlock); - - return(TRUE); -} -#endif /* Unused */ /* @@ -1049,7 +765,10 @@ void lck_rw_init( lck_rw_t *lck, lck_grp_t *grp, - __unused lck_attr_t *attr) { + lck_attr_t *attr) +{ + lck_attr_t *lck_attr = (attr != LCK_ATTR_NULL) ? + attr : &LockDefaultLckAttr; hw_lock_init(&lck->interlock); lck->want_write = FALSE; @@ -1057,6 +776,8 @@ lck_rw_init( lck->read_count = 0; lck->can_sleep = TRUE; lck->lck_rw_tag = 0; + lck->read_priority = (lck_attr->lck_attr_val & + LCK_ATTR_RW_SHARED_PRIORITY) != 0; lck_grp_reference(grp); lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); @@ -1109,6 +830,21 @@ lck_interlock_unlock(lck_rw_t *lck, boolean_t istate) ml_set_interrupts_enabled(istate); } + +/* + * This inline is used when busy-waiting for an rw lock. + * If interrupts were disabled when the lock primitive was called, + * we poll the IPI handler for pending tlb flushes. + * XXX This is a hack to avoid deadlocking on the pmap_system_lock. + */ +static inline void +lck_rw_lock_pause(boolean_t interrupts_enabled) +{ + if (!interrupts_enabled) + handle_pending_TLB_flushes(); + cpu_pause(); +} + /* * Routine: lck_rw_lock_exclusive */ @@ -1148,7 +884,7 @@ lck_rw_lock_exclusive( Debugger("timeout - want_write"); #endif /* MACH_LDEBUG */ while (--i != 0 && lck->want_write) - continue; + lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } @@ -1188,7 +924,7 @@ lck_rw_lock_exclusive( #endif /* MACH_LDEBUG */ while (--i != 0 && (lck->read_count != 0 || lck->want_upgrade)) - continue; + lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } @@ -1343,7 +1079,10 @@ lck_rw_lock_shared( #if MACH_LDEBUG decrementer = DECREMENTER_TIMEOUT; #endif /* MACH_LDEBUG */ - while (lck->want_write || lck->want_upgrade) { + while ((lck->want_write && (lck->read_priority ? + lck->read_count == 0 : TRUE)) || + lck->want_upgrade) { + i = lock_wait_time[lck->can_sleep ? 1 : 0]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, @@ -1355,12 +1094,18 @@ lck_rw_lock_shared( if (!--decrementer) Debugger("timeout - wait no writers"); #endif /* MACH_LDEBUG */ - while (--i != 0 && (lck->want_write || lck->want_upgrade)) - continue; + while (--i != 0 && + ((lck->want_write && (lck->read_priority ? + lck->read_count == 0 : TRUE)) || + lck->want_upgrade)) + lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } - if (lck->can_sleep && (lck->want_write || lck->want_upgrade)) { + if (lck->can_sleep && + ((lck->want_write && (lck->read_priority ? + lck->read_count == 0 : TRUE)) || + lck->want_upgrade)) { lck->waiting = TRUE; res = assert_wait((event_t) lck, THREAD_UNINT); if (res == THREAD_WAITING) { @@ -1449,7 +1194,7 @@ lck_rw_lock_shared_to_exclusive( Debugger("timeout - read_count"); #endif /* MACH_LDEBUG */ while (--i != 0 && lck->read_count != 0) - continue; + lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } @@ -1699,6 +1444,73 @@ lck_mtx_assert( { } +/* + * Routine: lck_mtx_lock_spin + * + * Invoked trying to acquire a mutex when there is contention but + * the holder is running on another processor. We spin for up to a maximum + * time waiting for the lock to be released. + * + * Called with the interlock unlocked. + */ +void +lck_mtx_lock_spin( + lck_mtx_t *lck) +{ + thread_t holder; + lck_mtx_t *mutex; + uint64_t deadline; + + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) + mutex = lck; + else + mutex = &lck->lck_mtx_ptr->lck_mtx; + + KERNEL_DEBUG( + MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_START, + (int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0); + + deadline = mach_absolute_time() + MutexSpin; + /* + * Spin while: + * - mutex is locked, and + * - owner is running on another processor, and + * - owner is not is the idle delay, and + * - we haven't spun for long enough. + */ + while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL && + (holder->machine.specFlags & OnProc) != 0 && + (holder->options & TH_OPT_DELAYIDLE) == 0 && + mach_absolute_time() < deadline) + cpu_pause(); +} + +/* + * Called from assembly code when a mutex interlock is held. + * We spin here re-checking the interlock but panic if we timeout. + * Note: here with interrupts disabled. + */ +void +lck_mtx_interlock_spin( + lck_mtx_t *lck) +{ + lck_mtx_t *mutex; + uint64_t deadline; + + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) + mutex = lck; + else + mutex = &lck->lck_mtx_ptr->lck_mtx; + + deadline = mach_absolute_time() + LockTimeOut; + while (mutex->lck_mtx_ilk != 0) { + cpu_pause(); + if (mach_absolute_time() > deadline) + panic("interlock timeout for mutex %p", lck); + } + +} + #if MACH_KDB void db_show_one_lock(lock_t *); @@ -1714,7 +1526,7 @@ db_show_one_lock( db_printf("%swaiting, %scan_sleep\n", lock->waiting ? "" : "!", lock->can_sleep ? "" : "!"); db_printf("Interlock:\n"); - db_show_one_simple_lock((db_expr_t)simple_lock_addr(lock->interlock), + db_show_one_simple_lock((db_expr_t) ((vm_offset_t)simple_lock_addr(lock->interlock)), TRUE, (db_expr_t)0, (char *)0); } @@ -1796,17 +1608,17 @@ _mutex_assert ( * fashion. */ -char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; -char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; +const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; +const char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; void db_show_one_simple_lock ( db_expr_t addr, boolean_t have_addr, - db_expr_t count, - char * modif) + __unused db_expr_t count, + __unused char * modif) { - simple_lock_t saddr = (simple_lock_t)addr; + simple_lock_t saddr = (simple_lock_t) ((vm_offset_t) addr); if (saddr == (simple_lock_t)0 || !have_addr) { db_error ("No simple_lock\n"); @@ -1838,10 +1650,10 @@ void db_show_one_mutex ( db_expr_t addr, boolean_t have_addr, - db_expr_t count, - char * modif) + __unused db_expr_t count, + __unused char * modif) { - mutex_t * maddr = (mutex_t *)addr; + mutex_t * maddr = (mutex_t *)((vm_offset_t) addr); if (maddr == (mutex_t *)0 || !have_addr) db_error ("No mutex\n"); diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s index a4ba462cc..2b6b1c718 100644 --- a/osfmk/i386/locore.s +++ b/osfmk/i386/locore.s @@ -65,10 +65,10 @@ #include #include -#include - -#define PREEMPT_DEBUG_LOG 0 +#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ +#include +#include /* * PTmap is recursive pagemap at top of virtual address space. @@ -102,6 +102,49 @@ #define CX(addr,reg) addr(,reg,4) +/* + * The following macros make calls into C code. + * They dynamically align the stack to 16 bytes. + * Arguments are moved (not pushed) onto the correctly aligned stack. + * NOTE: EDI is destroyed in the process, and hence cannot + * be directly used as a parameter. Users of this macro must + * independently preserve EDI (a non-volatile) if the routine is + * intended to be called from C, for instance. + */ + +#define CCALL(fn) \ + movl %esp, %edi ;\ + andl $0xFFFFFFF0, %esp ;\ + call EXT(fn) ;\ + movl %edi, %esp + +#define CCALL1(fn, arg1) \ + movl %esp, %edi ;\ + subl $4, %esp ;\ + andl $0xFFFFFFF0, %esp ;\ + movl arg1, 0(%esp) ;\ + call EXT(fn) ;\ + movl %edi, %esp + +#define CCALL2(fn, arg1, arg2) \ + movl %esp, %edi ;\ + subl $8, %esp ;\ + andl $0xFFFFFFF0, %esp ;\ + movl arg2, 4(%esp) ;\ + movl arg1, 0(%esp) ;\ + call EXT(fn) ;\ + movl %edi, %esp + +#define CCALL3(fn, arg1, arg2, arg3) \ + movl %esp, %edi ;\ + subl $12, %esp ;\ + andl $0xFFFFFFF0, %esp ;\ + movl arg3, 8(%esp) ;\ + movl arg2, 4(%esp) ;\ + movl arg1, 0(%esp) ;\ + call EXT(fn) ;\ + movl %edi, %esp + .text locore_start: @@ -111,7 +154,6 @@ locore_start: #ifdef __MACHO__ #define RECOVERY_SECTION .section __VECTORS, __recover -#define RETRY_SECTION .section __VECTORS, __retries #else #define RECOVERY_SECTION .text #define RECOVERY_SECTION .text @@ -137,34 +179,10 @@ LEXT(recover_table_end) ;\ .text /* - * Retry table for certain successful faults. - */ -#define RETRY_TABLE_START \ - .align 3; \ - .globl EXT(retry_table) ;\ -LEXT(retry_table) ;\ - .text - -#define RETRY(addr) \ - .align 3 ;\ - .long 9f ;\ - .long addr ;\ - .text ;\ -9: - -#define RETRY_TABLE_END \ - .align 3; \ - .globl EXT(retry_table_end) ;\ -LEXT(retry_table_end) ;\ - .text - -/* - * Allocate recovery and retry tables. + * Allocate recovery and table. */ RECOVERY_SECTION RECOVER_TABLE_START - RETRY_SECTION - RETRY_TABLE_START /* * Timing routines. @@ -200,22 +218,30 @@ Entry(timer_grab) /* * Low 32-bits of nanotime returned in %eax. - * Computed from tsc using conversion scale/shift from per-cpu data. - * Uses %ecx and %edx. - */ -#define NANOTIME32 \ - pushl %esi /* save %esi */ ;\ - movl %gs:CPU_THIS,%esi /* per-cpu data ptr */ ;\ - addl $(CPU_RTC_NANOTIME),%esi /* esi -> per-cpu nanotime*/ ;\ - rdtsc /* edx:eax = tsc */ ;\ - subl RTN_TSC(%esi),%eax /* eax = (tsc - base_tsc) */ ;\ - mull RTN_SCALE(%esi) /* eax *= scale */ ;\ - movl RTN_SHIFT(%esi),%ecx /* ecx = shift */ ;\ - shrdl %cl,%edx,%eax /* edx:eax >> shift */ ;\ - andb $32,%cl /* shift == 32? */ ;\ - cmovnel %edx,%eax /* %eax = %edx if so */ ;\ - addl RTN_NANOS(%esi),%eax /* add base ns */ ;\ - popl %esi + * Computed from tsc based on the scale factor + * and an implicit 32 bit shift. + * + * Uses %esi, %edi, %ebx, %ecx and %edx. + */ +#define RNT_INFO _rtc_nanotime_info +#define NANOTIME32 \ +0: movl RNT_INFO+RNT_TSC_BASE,%esi ;\ + movl RNT_INFO+RNT_TSC_BASE+4,%edi ;\ + rdtsc ;\ + subl %esi,%eax /* tsc - tsc_base */ ;\ + sbbl %edi,%edx ;\ + movl RNT_INFO+RNT_SCALE,%ecx ;\ + movl %edx,%ebx /* delta * scale */ ;\ + mull %ecx ;\ + movl %ebx,%eax ;\ + movl %edx,%ebx ;\ + mull %ecx ;\ + addl %ebx,%eax ;\ + addl RNT_INFO+RNT_NS_BASE,%eax /* add ns_base */ ;\ + cmpl RNT_INFO+RNT_TSC_BASE,%esi ;\ + jne 0b ;\ + cmpl RNT_INFO+RNT_TSC_BASE+4,%edi ;\ + jne 0b /* * Add 32-bit ns delta in register dreg to timer pointed to by register treg. @@ -231,8 +257,6 @@ Entry(timer_grab) * Add time delta to old timer and start new. */ #define TIMER_EVENT(old,new) \ - pushl %eax /* must be invariant */ ;\ - cli /* block interrupts */ ;\ NANOTIME32 /* eax low bits nanosecs */ ;\ movl %gs:CPU_PROCESSOR,%ecx /* get current processor */ ;\ movl CURRENT_TIMER(%ecx),%ecx /* get current timer */ ;\ @@ -242,25 +266,24 @@ Entry(timer_grab) addl $(new##_TIMER-old##_TIMER),%ecx /* point to new timer */ ;\ movl %edx,TIMER_TSTAMP(%ecx) /* set timestamp */ ;\ movl %gs:CPU_PROCESSOR,%edx /* get current processor */ ;\ - movl %ecx,CURRENT_TIMER(%edx) /* set current timer */ ;\ - sti /* interrupts on */ ;\ - popl %eax /* must be invariant */ + movl %ecx,CURRENT_TIMER(%edx) /* set current timer */ + /* * Update time on user trap entry. - * Uses %ecx,%edx. + * Uses %eax,%ecx,%edx,%esi. */ #define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) /* * update time on user trap exit. - * Uses %ecx,%edx. + * Uses %eax,%ecx,%edx,%esi. */ #define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) /* * update time on interrupt entry. - * Uses %eax,%ecx,%edx. + * Uses %eax,%ecx,%edx,%esi. */ #define TIME_INT_ENTRY \ NANOTIME32 /* eax low bits nanosecs */ ;\ @@ -275,7 +298,7 @@ Entry(timer_grab) /* * update time on interrupt exit. - * Uses %eax, %ecx, %edx. + * Uses %eax, %ecx, %edx, %esi. */ #define TIME_INT_EXIT \ NANOTIME32 /* eax low bits nanosecs */ ;\ @@ -290,75 +313,6 @@ Entry(timer_grab) #endif /* STAT_TIME */ -/* - * Encapsulate the transfer of exception stack frames between a PCB - * and a thread stack. Since the whole point of these is to emulate - * a call or exception that changes privilege level, both macros - * assume that there is no user esp or ss stored in the source - * frame (because there was no change of privilege to generate them). - */ - -/* - * Transfer a stack frame from a thread's user stack to its PCB. - * We assume the thread and stack addresses have been loaded into - * registers (our arguments). - * - * The macro overwrites edi, esi, ecx and whatever registers hold the - * thread and stack addresses (which can't be one of the above three). - * The thread address is overwritten with the address of its saved state - * (where the frame winds up). - * - * Must be called on kernel stack. - */ -#define FRAME_STACK_TO_PCB(thread, stkp) ;\ - movl ACT_PCB(thread),thread /* get act`s PCB */ ;\ - leal PCB_ISS(thread),%edi /* point to PCB`s saved state */;\ - movl %edi,thread /* save for later */ ;\ - movl stkp,%esi /* point to start of frame */ ;\ - movl $ R_UESP,%ecx ;\ - sarl $2,%ecx /* word count for transfer */ ;\ - cld /* we`re incrementing */ ;\ - rep ;\ - movsl /* transfer the frame */ ;\ - addl $ R_UESP,stkp /* derive true "user" esp */ ;\ - movl stkp,R_UESP(thread) /* store in PCB */ ;\ - movl $0,%ecx ;\ - mov %ss,%cx /* get current ss */ ;\ - movl %ecx,R_SS(thread) /* store in PCB */ - -/* - * Transfer a stack frame from a thread's PCB to the stack pointed - * to by the PCB. We assume the thread address has been loaded into - * a register (our argument). - * - * The macro overwrites edi, esi, ecx and whatever register holds the - * thread address (which can't be one of the above three). The - * thread address is overwritten with the address of its saved state - * (where the frame winds up). - * - * Must be called on kernel stack. - */ -#define FRAME_PCB_TO_STACK(thread) ;\ - movl ACT_PCB(thread),%esi /* get act`s PCB */ ;\ - leal PCB_ISS(%esi),%esi /* point to PCB`s saved state */;\ - movl R_UESP(%esi),%edi /* point to end of dest frame */;\ - movl ACT_MAP(thread),%ecx /* get act's map */ ;\ - movl MAP_PMAP(%ecx),%ecx /* get map's pmap */ ;\ - cmpl EXT(kernel_pmap), %ecx /* If kernel loaded task */ ;\ - jz 1f /* use kernel data segment */ ;\ - movl $ USER_DS,%cx /* else use user data segment */;\ - mov %cx,%es ;\ -1: ;\ - movl $ R_UESP,%ecx ;\ - subl %ecx,%edi /* derive start of frame */ ;\ - movl %edi,thread /* save for later */ ;\ - sarl $2,%ecx /* word count for transfer */ ;\ - cld /* we`re incrementing */ ;\ - rep ;\ - movsl /* transfer the frame */ ;\ - mov %ss,%cx /* restore kernel segments */ ;\ - mov %cx,%es - #undef PDEBUG #ifdef PDEBUG @@ -391,7 +345,7 @@ label/**/exit: #define CAH(label) #endif /* PDEBUG */ - + #if MACH_KDB /* * Last-ditch debug code to handle faults that might result @@ -452,13 +406,13 @@ Entry(db_task_gen_prot) */ Entry(db_task_start) movl %esp,%edx - subl $ISS_SIZE,%edx + subl $(ISS32_SIZE),%edx movl %edx,%esp /* allocate i386_saved_state on stack */ movl %eax,R_ERR(%esp) movl %ebx,R_TRAPNO(%esp) pushl %edx CPU_NUMBER(%edx) - movl CX(EXT(mp_dbtss),%edx),%edx + movl CX(EXT(master_dbtss),%edx),%edx movl TSS_LINK(%edx),%eax pushl %eax /* pass along selector of previous TSS */ call EXT(db_tss_to_frame) @@ -472,572 +426,149 @@ Entry(db_task_start) #endif /* MACH_KDB */ /* - * Trap/interrupt entry points. - * - * All traps must create the following save area on the PCB "stack": - * - * gs - * fs - * es - * ds - * edi - * esi - * ebp - * cr2 if page fault - otherwise unused - * ebx - * edx - * ecx - * eax - * trap number - * error code - * eip - * cs - * eflags - * user esp - if from user - * user ss - if from user - * es - if from V86 thread - * ds - if from V86 thread - * fs - if from V86 thread - * gs - if from V86 thread - * + * Called as a function, makes the current thread + * return from the kernel as if from an exception. */ -/* - * General protection or segment-not-present fault. - * Check for a GP/NP fault in the kernel_return - * sequence; if there, report it as a GP/NP fault on the user's instruction. - * - * esp-> 0: trap code (NP or GP) - * 4: segment number in error - * 8 eip - * 12 cs - * 16 eflags - * 20 old registers (trap is from kernel) - */ -Entry(t_gen_prot) - pushl $(T_GENERAL_PROTECTION) /* indicate fault type */ - jmp trap_check_kernel_exit /* check for kernel exit sequence */ - -Entry(t_segnp) - pushl $(T_SEGMENT_NOT_PRESENT) - /* indicate fault type */ - -trap_check_kernel_exit: - testl $(EFL_VM),16(%esp) /* is trap from V86 mode? */ - jnz EXT(alltraps) /* isn`t kernel trap if so */ - testl $3,12(%esp) /* is trap from kernel mode? */ - jne EXT(alltraps) /* if so: */ - /* check for the kernel exit sequence */ - cmpl $ EXT(kret_iret),8(%esp) /* on IRET? */ - je fault_iret - cmpl $ EXT(kret_popl_ds),8(%esp) /* popping DS? */ - je fault_popl_ds - cmpl $ EXT(kret_popl_es),8(%esp) /* popping ES? */ - je fault_popl_es - cmpl $ EXT(kret_popl_fs),8(%esp) /* popping FS? */ - je fault_popl_fs - cmpl $ EXT(kret_popl_gs),8(%esp) /* popping GS? */ - je fault_popl_gs -take_fault: /* if none of the above: */ - jmp EXT(alltraps) /* treat as normal trap. */ - -/* - * GP/NP fault on IRET: CS or SS is in error. - * All registers contain the user's values. - * - * on SP is - * 0 trap number - * 4 errcode - * 8 eip - * 12 cs --> trapno - * 16 efl --> errcode - * 20 user eip - * 24 user cs - * 28 user eflags - * 32 user esp - * 36 user ss - */ -fault_iret: - movl %eax,8(%esp) /* save eax (we don`t need saved eip) */ - popl %eax /* get trap number */ - movl %eax,12-4(%esp) /* put in user trap number */ - popl %eax /* get error code */ - movl %eax,16-8(%esp) /* put in user errcode */ - popl %eax /* restore eax */ - CAH(fltir) - jmp EXT(alltraps) /* take fault */ - -/* - * Fault restoring a segment register. The user's registers are still - * saved on the stack. The offending segment register has not been - * popped. - */ -fault_popl_ds: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_es /* (DS on top of stack) */ -fault_popl_es: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_fs /* (ES on top of stack) */ -fault_popl_fs: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_gs /* (FS on top of stack) */ -fault_popl_gs: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_segregs /* (GS on top of stack) */ - -push_es: - pushl %es /* restore es, */ -push_fs: - pushl %fs /* restore fs, */ -push_gs: - pushl %gs /* restore gs. */ -push_segregs: - movl %eax,R_TRAPNO(%esp) /* set trap number */ - movl %edx,R_ERR(%esp) /* set error code */ - CAH(fltpp) - jmp trap_set_segs /* take trap */ - -/* - * Debug trap. Check for single-stepping across system call into - * kernel. If this is the case, taking the debug trap has turned - * off single-stepping - save the flags register with the trace - * bit set. - */ -Entry(t_debug) - testl $(EFL_VM),8(%esp) /* is trap from V86 mode? */ - jnz 0f /* isn`t kernel trap if so */ - testl $3,4(%esp) /* is trap from kernel mode? */ - jnz 0f /* if so: */ - cmpl $syscall_entry,(%esp) /* system call entry? */ - jne 1f /* if so: */ - /* flags are sitting where syscall */ - /* wants them */ - addl $8,%esp /* remove eip/cs */ - jmp syscall_entry_2 /* continue system call entry */ - -1: cmpl $trap_unix_addr,(%esp) - jne 0f - addl $8,%esp - jmp trap_unix_2 - -0: pushl $0 /* otherwise: */ - pushl $(T_DEBUG) /* handle as normal */ - jmp EXT(alltraps) /* debug fault */ + .globl EXT(thread_exception_return) + .globl EXT(thread_bootstrap_return) +LEXT(thread_exception_return) +LEXT(thread_bootstrap_return) + cli + movl %gs:CPU_KERNEL_STACK,%ecx + movl (%ecx),%esp /* switch back to PCB stack */ + jmp EXT(return_from_trap) -/* - * Page fault traps save cr2. - */ -Entry(t_page_fault) - pushl $(T_PAGE_FAULT) /* mark a page fault trap */ - pusha /* save the general registers */ - movl %cr2,%eax /* get the faulting address */ - movl %eax,12(%esp) /* save in esp save slot */ - jmp trap_push_segs /* continue fault */ +Entry(call_continuation) + movl S_ARG0,%eax /* get continuation */ + movl S_ARG1,%edx /* continuation param */ + movl S_ARG2,%ecx /* wait result */ + movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */ + xorl %ebp,%ebp /* zero frame pointer */ + subl $8,%esp /* align the stack */ + pushl %ecx + pushl %edx + call *%eax /* call continuation */ + addl $16,%esp + movl %gs:CPU_ACTIVE_THREAD,%eax + pushl %eax + call EXT(thread_terminate) + -/* - * All 'exceptions' enter here with: - * esp-> trap number - * error code - * old eip - * old cs - * old eflags - * old esp if trapped from user - * old ss if trapped from user + +/******************************************************************************************************* + * + * All 64 bit task 'exceptions' enter lo_alltraps: + * esp -> x86_saved_state_t + * + * The rest of the state is set up as: + * cr3 -> kernel directory + * esp -> low based stack + * gs -> CPU_DATA_GS + * cs -> KERNEL_CS + * ss/ds/es -> KERNEL_DS * - * NB: below use of CPU_NUMBER assumes that macro will use correct - * segment register for any kernel data accesses. - */ -Entry(alltraps) - pusha /* save the general registers */ -trap_push_segs: - pushl %ds /* save the segment registers */ - pushl %es - pushl %fs - pushl %gs - -trap_set_segs: - movl %ss,%ax - movl %ax,%ds - movl %ax,%es /* switch to kernel data seg */ - cld /* clear direction flag */ - testl $(EFL_VM),R_EFLAGS(%esp) /* in V86 mode? */ - jnz trap_from_user /* user mode trap if so */ - testb $3,R_CS(%esp) /* user mode trap? */ - jnz trap_from_user - cmpl $0,%gs:CPU_ACTIVE_KLOADED - je trap_from_kernel /* if clear, truly in kernel */ -#ifdef FIXME - cmpl ETEXT_ADDR,R_EIP(%esp) /* pc within kernel? */ - jb trap_from_kernel -#endif -trap_from_kloaded: - /* - * We didn't enter here "through" PCB (i.e., using ring 0 stack), - * so transfer the stack frame into the PCB explicitly, then - * start running on resulting "PCB stack". We have to set - * up a simulated "uesp" manually, since there's none in the - * frame. - */ - mov $ CPU_DATA_GS,%dx - mov %dx,%gs - CAH(atstart) - movl %gs:CPU_ACTIVE_KLOADED,%ebx - movl %gs:CPU_KERNEL_STACK,%eax - xchgl %esp,%eax - FRAME_STACK_TO_PCB(%ebx,%eax) - CAH(atend) - jmp EXT(take_trap) - -trap_from_user: - mov $ CPU_DATA_GS,%ax - mov %ax,%gs - + * interrupts disabled + * direction flag cleared + */ +Entry(lo_alltraps) + movl R_CS(%esp),%eax /* assume 32-bit state */ + cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ + jne 1f + movl R64_CS(%esp),%eax /* 64-bit user mode */ +1: + testb $3,%eax + jz trap_from_kernel + /* user mode trap */ TIME_TRAP_UENTRY movl %gs:CPU_KERNEL_STACK,%ebx - xchgl %ebx,%esp /* switch to kernel stack */ - /* user regs pointer already set */ -LEXT(take_trap) - pushl %ebx /* record register save area */ - pushl %ebx /* pass register save area to trap */ - call EXT(user_trap) /* call user trap routine */ - movl 4(%esp),%esp /* switch back to PCB stack */ + xchgl %ebx,%esp /* switch to kernel stack */ + sti + + CCALL1(user_trap, %ebx) /* call user trap routine */ + cli /* hold off intrs - critical section */ + popl %esp /* switch back to PCB stack */ /* * Return from trap or system call, checking for ASTs. - * On PCB stack. - */ - + * On lowbase PCB stack with intrs disabled + */ LEXT(return_from_trap) - movl %gs:CPU_PENDING_AST,%edx - cmpl $0,%edx - je EXT(return_to_user) /* if we need an AST: */ - - movl %gs:CPU_KERNEL_STACK,%esp - /* switch to kernel stack */ - pushl $0 /* push preemption flag */ - call EXT(i386_astintr) /* take the AST */ - addl $4,%esp /* pop preemption flag */ + movl %gs:CPU_PENDING_AST,%eax + testl %eax,%eax + je EXT(return_to_user) /* branch if no AST */ + + movl %gs:CPU_KERNEL_STACK,%ebx + xchgl %ebx,%esp /* switch to kernel stack */ + sti /* interrupts always enabled on return to user mode */ + + pushl %ebx /* save PCB stack */ + CCALL1(i386_astintr, $0) /* take the AST */ + cli popl %esp /* switch back to PCB stack (w/exc link) */ jmp EXT(return_from_trap) /* and check again (rare) */ - /* ASTs after this point will */ - /* have to wait */ -/* - * Arrange the checks needed for kernel-loaded (or kernel-loading) - * threads so that branch is taken in kernel-loaded case. - */ LEXT(return_to_user) TIME_TRAP_UEXIT - cmpl $0,%gs:CPU_ACTIVE_KLOADED - jnz EXT(return_xfer_stack) - movl %gs:CPU_ACTIVE_THREAD, %ebx /* get active thread */ - -#if MACH_RT -#if MACH_ASSERT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL - je EXT(return_from_kernel) - int $3 -#endif /* MACH_ASSERT */ -#endif /* MACH_RT */ - -/* - * Return from kernel mode to interrupted thread. - */ - -LEXT(return_from_kernel) -LEXT(kret_popl_gs) - popl %gs /* restore segment registers */ -LEXT(kret_popl_fs) - popl %fs -LEXT(kret_popl_es) - popl %es -LEXT(kret_popl_ds) - popl %ds - popa /* restore general registers */ - addl $8,%esp /* discard trap number and error code */ - -LEXT(kret_iret) - iret /* return from interrupt */ +LEXT(ret_to_user) + cmpl $0, %gs:CPU_IS64BIT + je EXT(lo_ret_to_user) + jmp EXT(lo64_ret_to_user) -LEXT(return_xfer_stack) - /* - * If we're on PCB stack in a kernel-loaded task, we have - * to transfer saved state back to thread stack and swap - * stack pointers here, because the hardware's not going - * to do so for us. - */ - CAH(rxsstart) - movl %gs:CPU_KERNEL_STACK,%esp - movl %gs:CPU_ACTIVE_KLOADED,%eax - FRAME_PCB_TO_STACK(%eax) - movl %eax,%esp - CAH(rxsend) - jmp EXT(return_from_kernel) - -/* - * Hate to put this here, but setting up a separate swap_func for - * kernel-loaded threads no longer works, since thread executes - * "for a while" (i.e., until it reaches glue code) when first - * created, even if it's nominally suspended. Hence we can't - * transfer the PCB when the thread first resumes, because we - * haven't initialized it yet. - */ -/* - * Have to force transfer to new stack "manually". Use a string - * move to transfer all of our saved state to the stack pointed - * to by iss.uesp, then install a pointer to it as our current - * stack pointer. - */ -LEXT(return_kernel_loading) - movl %gs:CPU_KERNEL_STACK,%esp - movl %gs:CPU_ACTIVE_THREAD, %ebx /* get active thread */ - movl %ebx,%edx /* save for later */ - FRAME_PCB_TO_STACK(%ebx) - movl %ebx,%esp /* start running on new stack */ - movl $0,%gs:CPU_ACTIVE_KLOADED /* set cached indicator */ - jmp EXT(return_from_kernel) + /* - * Trap from kernel mode. No need to switch stacks or load segment registers. + * Trap from kernel mode. No need to switch stacks. + * Interrupts must be off here - we will set them to state at time of trap + * as soon as it's safe for us to do so and not recurse doing preemption */ trap_from_kernel: -#if MACH_KDB || MACH_KGDB - mov $ CPU_DATA_GS,%ax - mov %ax,%gs - movl %esp,%ebx /* save current stack */ - - cmpl EXT(int_stack_high),%esp /* on an interrupt stack? */ - jb 6f /* OK if so */ - -#if MACH_KGDB - cmpl $0,EXT(kgdb_active) /* Unexpected trap in kgdb */ - je 0f /* no */ - - pushl %esp /* Already on kgdb stack */ - cli - call EXT(kgdb_trap) - addl $4,%esp - jmp EXT(return_from_kernel) -0: /* should kgdb handle this exception? */ - cmpl $(T_NO_FPU),R_TRAPNO(%esp) /* FPU disabled? */ - je 2f /* yes */ - cmpl $(T_PAGE_FAULT),R_TRAPNO(%esp) /* page fault? */ - je 2f /* yes */ -1: - cli /* disable interrupts */ - CPU_NUMBER(%edx) /* get CPU number */ - movl CX(EXT(kgdb_stacks),%edx),%ebx - xchgl %ebx,%esp /* switch to kgdb stack */ - pushl %ebx /* pass old sp as an arg */ - call EXT(kgdb_from_kernel) - popl %esp /* switch back to kernel stack */ - jmp EXT(return_from_kernel) -2: -#endif /* MACH_KGDB */ - -#if MACH_KDB - cmpl $0,EXT(db_active) /* could trap be from ddb? */ - je 3f /* no */ - CPU_NUMBER(%edx) /* see if this CPU is in ddb */ - cmpl $0,CX(EXT(kdb_active),%edx) - je 3f /* no */ - pushl %esp - call EXT(db_trap_from_asm) - addl $0x4,%esp - jmp EXT(return_from_kernel) - -3: - /* - * Dilemma: don't want to switch to kernel_stack if trap - * "belongs" to ddb; don't want to switch to db_stack if - * trap "belongs" to kernel. So have to duplicate here the - * set of trap types that kernel_trap() handles. Note that - * "unexpected" page faults will not be handled by kernel_trap(). - * In this panic-worthy case, we fall into the debugger with - * kernel_stack containing the call chain that led to the - * bogus fault. - */ - movl R_TRAPNO(%esp),%edx - cmpl $(T_PAGE_FAULT),%edx - je 4f - cmpl $(T_NO_FPU),%edx - je 4f - cmpl $(T_FPU_FAULT),%edx - je 4f - cmpl $(T_FLOATING_POINT_ERROR),%edx - je 4f - cmpl $(T_PREEMPT),%edx - jne 7f -4: -#endif /* MACH_KDB */ + movl %esp, %eax /* saved state addr */ + CCALL1(kernel_trap, %eax) /* to kernel trap routine */ + cli - cmpl %gs:CPU_KERNEL_STACK,%esp - /* if not already on kernel stack, */ - ja 5f /* check some more */ - cmpl %gs:CPU_ACTIVE_STACK,%esp - ja 6f /* on kernel stack: no switch */ -5: - movl %gs:CPU_KERNEL_STACK,%esp -6: - pushl %ebx /* save old stack */ - pushl %ebx /* pass as parameter */ - call EXT(kernel_trap) /* to kernel trap routine */ - addl $4,%esp /* pop parameter */ - testl %eax,%eax - jne 8f - /* - * If kernel_trap returns false, trap wasn't handled. - */ -7: -#if MACH_KDB - CPU_NUMBER(%edx) - movl CX(EXT(db_stacks),%edx),%esp - pushl %ebx /* pass old stack as parameter */ - call EXT(db_trap_from_asm) -#endif /* MACH_KDB */ -#if MACH_KGDB - cli /* disable interrupts */ - CPU_NUMBER(%edx) /* get CPU number */ - movl CX(EXT(kgdb_stacks),%edx),%esp - pushl %ebx /* pass old stack as parameter */ - call EXT(kgdb_from_kernel) -#endif /* MACH_KGDB */ - addl $4,%esp /* pop parameter */ - testl %eax,%eax - jne 8f - /* - * Likewise, if kdb_trap/kgdb_from_kernel returns false, trap - * wasn't handled. - */ - pushl %ebx /* pass old stack as parameter */ - call EXT(panic_trap) - addl $4,%esp /* pop parameter */ -8: - movl %ebx,%esp /* get old stack (from callee-saves reg) */ -#else /* MACH_KDB || MACH_KGDB */ - pushl %esp /* pass parameter */ - call EXT(kernel_trap) /* to kernel trap routine */ - addl $4,%esp /* pop parameter */ -#endif /* MACH_KDB || MACH_KGDB */ - -#if MACH_RT movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ testl $ AST_URGENT,%eax /* any urgent preemption? */ - je EXT(return_from_kernel) /* no, nothing to do */ - cmpl $ T_PREEMPT,48(%esp) /* preempt request? */ - jne EXT(return_from_kernel) /* no, nothing to do */ + je ret_to_kernel /* no, nothing to do */ + cmpl $ T_PREEMPT,R_TRAPNO(%esp) + je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ + testl $ EFL_IF,R_EFLAGS(%esp) /* interrupts disabled? */ + je ret_to_kernel + cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ + jne ret_to_kernel movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx andl $(-KERNEL_STACK_SIZE),%ecx testl %ecx,%ecx /* are we on the kernel stack? */ - jne EXT(return_from_kernel) /* no, skip it */ - -#if PREEMPT_DEBUG_LOG - pushl 28(%esp) /* stack pointer */ - pushl 24+4(%esp) /* frame pointer */ - pushl 56+8(%esp) /* stack pointer */ - pushl $0f - call EXT(log_thread_action) - addl $16, %esp - .data -0: String "trap preempt eip" - .text -#endif /* PREEMPT_DEBUG_LOG */ - - pushl $1 /* push preemption flag */ - call EXT(i386_astintr) /* take the AST */ - addl $4,%esp /* pop preemption flag */ -#endif /* MACH_RT */ - - jmp EXT(return_from_kernel) + jne ret_to_kernel /* no, skip it */ -/* - * Called as a function, makes the current thread - * return from the kernel as if from an exception. - */ + CCALL1(i386_astintr, $1) /* take the AST */ - .globl EXT(thread_exception_return) - .globl EXT(thread_bootstrap_return) -LEXT(thread_exception_return) -LEXT(thread_bootstrap_return) - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - jmp EXT(return_from_trap) +ret_to_kernel: + cmpl $0, %gs:CPU_IS64BIT + je EXT(lo_ret_to_kernel) + jmp EXT(lo64_ret_to_kernel) -Entry(call_continuation) - movl S_ARG0,%eax /* get continuation */ - movl S_ARG1,%edx /* continuation param */ - movl S_ARG2,%ecx /* wait result */ - movl %esp,%ebp /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ebp - addl $(-3-IKS_SIZE),%ebp - movl %ebp,%esp /* pop the stack */ - xorl %ebp,%ebp /* zero frame pointer */ - pushl %ecx - pushl %edx - call *%eax /* call continuation */ - addl $8,%esp - movl %gs:CPU_ACTIVE_THREAD,%eax - pushl %eax - call EXT(thread_terminate) -#if 0 -#define LOG_INTERRUPT(info,msg) \ - pushal ; \ - pushl msg ; \ - pushl info ; \ - call EXT(log_thread_action) ; \ - add $8,%esp ; \ - popal -#define CHECK_INTERRUPT_TIME(n) \ - pushal ; \ - pushl $n ; \ - call EXT(check_thread_time) ; \ - add $4,%esp ; \ - popal -#else -#define LOG_INTERRUPT(info,msg) -#define CHECK_INTERRUPT_TIME(n) -#endif - -.data -imsg_start: - String "interrupt start" -imsg_end: - String "interrupt end" - -.text -/* - * All interrupts enter here. - * old %eax on stack; interrupt number in %eax. - */ -Entry(all_intrs) - pushl %ecx /* save registers */ - pushl %edx - cld /* clear direction flag */ - - pushl %ds /* save segment registers */ - pushl %es - pushl %fs - pushl %gs - mov %ss,%dx /* switch to kernel segments */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs +/******************************************************************************************************* + * + * All interrupts on all tasks enter here with: + * esp-> -> x86_saved_state_t + * + * cr3 -> kernel directory + * esp -> low based stack + * gs -> CPU_DATA_GS + * cs -> KERNEL_CS + * ss/ds/es -> KERNEL_DS + * + * interrupts disabled + * direction flag cleared + */ +Entry(lo_allintrs) /* * test whether already on interrupt stack */ @@ -1047,710 +578,319 @@ Entry(all_intrs) leal -INTSTACK_SIZE(%ecx),%edx cmpl %esp,%edx jb int_from_intstack -1: - movl %esp,%edx /* & i386_interrupt_state */ +1: xchgl %ecx,%esp /* switch to interrupt stack */ + movl %cr0,%eax /* get cr0 */ + orl $(CR0_TS),%eax /* or in TS bit */ + movl %eax,%cr0 /* set cr0 */ + + subl $8, %esp /* for 16-byte stack alignment */ pushl %ecx /* save pointer to old stack */ - pushl %edx /* pass &i386_interrupt_state to pe_incoming_interrupt */ - pushl %eax /* push trap number */ + movl %ecx,%gs:CPU_INT_STATE /* save intr state */ TIME_INT_ENTRY /* do timing */ -#if MACH_RT incl %gs:CPU_PREEMPTION_LEVEL -#endif /* MACH_RT */ incl %gs:CPU_INTERRUPT_LEVEL - call EXT(PE_incoming_interrupt) /* call generic interrupt routine */ - addl $8,%esp /* Pop trap number and eip */ + movl %gs:CPU_INT_STATE, %eax + CCALL1(PE_incoming_interrupt, %eax) /* call generic interrupt routine */ + + cli /* just in case we returned with intrs enabled */ + xorl %eax,%eax + movl %eax,%gs:CPU_INT_STATE /* clear intr state pointer */ .globl EXT(return_to_iret) LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ decl %gs:CPU_INTERRUPT_LEVEL - -#if MACH_RT decl %gs:CPU_PREEMPTION_LEVEL -#endif /* MACH_RT */ TIME_INT_EXIT /* do timing */ + movl %gs:CPU_ACTIVE_THREAD,%eax + movl ACT_PCB(%eax),%eax /* get act`s PCB */ + movl PCB_FPS(%eax),%eax /* get pcb's ims.ifps */ + cmpl $0,%eax /* Is there a context */ + je 1f /* Branch if not */ + movl FP_VALID(%eax),%eax /* Load fp_valid */ + cmpl $0,%eax /* Check if valid */ + jne 1f /* Branch if valid */ + clts /* Clear TS */ + jmp 2f +1: + movl %cr0,%eax /* get cr0 */ + orl $(CR0_TS),%eax /* or in TS bit */ + movl %eax,%cr0 /* set cr0 */ +2: popl %esp /* switch back to old stack */ - movl %gs:CPU_PENDING_AST,%eax - testl %eax,%eax /* any pending asts? */ - je 1f /* no, nothing to do */ - testl $(EFL_VM),I_EFL(%esp) /* if in V86 */ - jnz ast_from_interrupt /* take it */ - testb $3,I_CS(%esp) /* user mode, */ - jnz ast_from_interrupt /* take it */ -#ifdef FIXME - cmpl ETEXT_ADDR,I_EIP(%esp) /* if within kernel-loaded task, */ - jnb ast_from_interrupt /* take it */ -#endif + /* Load interrupted code segment into %eax */ + movl R_CS(%esp),%eax /* assume 32-bit state */ + cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ + jne 3f + movl R64_CS(%esp),%eax /* 64-bit user mode */ +3: + testb $3,%eax /* user mode, */ + jnz ast_from_interrupt_user /* go handle potential ASTs */ + /* + * we only want to handle preemption requests if + * the interrupt fell in the kernel context + * and preemption isn't disabled + */ + movl %gs:CPU_PENDING_AST,%eax + testl $ AST_URGENT,%eax /* any urgent requests? */ + je ret_to_kernel /* no, nothing to do */ + + cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ + jne ret_to_kernel /* yes, skip it */ -#if MACH_RT - cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption masked? */ - jne 1f /* yes, skip it */ - testl $ AST_URGENT,%eax /* any urgent requests? */ - je 1f /* no, skip it */ - cmpl $ EXT(locore_end),I_EIP(%esp) /* are we in locore code? */ - jb 1f /* yes, skip it */ movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx andl $(-KERNEL_STACK_SIZE),%ecx - testl %ecx,%ecx /* are we on the kernel stack? */ - jne 1f /* no, skip it */ + testl %ecx,%ecx /* are we on the kernel stack? */ + jne ret_to_kernel /* no, skip it */ -/* - * Take an AST from kernel space. We don't need (and don't want) - * to do as much as the case where the interrupt came from user - * space. - */ -#if PREEMPT_DEBUG_LOG - pushl $0 - pushl $0 - pushl I_EIP+8(%esp) - pushl $0f - call EXT(log_thread_action) - addl $16, %esp - .data -0: String "intr preempt eip" - .text -#endif /* PREEMPT_DEBUG_LOG */ + /* + * Take an AST from kernel space. We don't need (and don't want) + * to do as much as the case where the interrupt came from user + * space. + */ + CCALL1(i386_astintr, $1) - sti - pushl $1 /* push preemption flag */ - call EXT(i386_astintr) /* take the AST */ - addl $4,%esp /* pop preemption flag */ -#endif /* MACH_RT */ + jmp ret_to_kernel -1: - pop %gs - pop %fs - pop %es /* restore segment regs */ - pop %ds - pop %edx - pop %ecx - pop %eax - iret /* return to caller */ +/* + * nested int - simple path, can't preempt etc on way out + */ int_from_intstack: -#if MACH_RT incl %gs:CPU_PREEMPTION_LEVEL -#endif /* MACH_RT */ - incl %gs:CPU_INTERRUPT_LEVEL - movl %esp, %edx /* i386_interrupt_state */ - pushl %edx /* pass &i386_interrupt_state to PE_incoming_interrupt /* - - pushl %eax /* Push trap number */ - - call EXT(PE_incoming_interrupt) - addl $20,%esp /* pop i386_interrupt_state, gs,fs,es,ds */ - -LEXT(return_to_iret_i) /* ( label for kdb_kintr) */ - - addl $4,%esp /* pop trap number */ + movl %esp, %edx /* i386_saved_state */ + CCALL1(PE_incoming_interrupt, %edx) decl %gs:CPU_INTERRUPT_LEVEL - -#if MACH_RT decl %gs:CPU_PREEMPTION_LEVEL -#endif /* MACH_RT */ - pop %edx /* must have been on kernel segs */ - pop %ecx - pop %eax /* no ASTs */ - iret + jmp ret_to_kernel /* - * Take an AST from an interrupt. - * On PCB stack. - * sp-> es -> edx - * ds -> ecx - * edx -> eax - * ecx -> trapno - * eax -> code - * eip - * cs - * efl - * esp - * ss - */ -ast_from_interrupt: - pop %gs - pop %fs - pop %es /* restore all registers ... */ - pop %ds - popl %edx - popl %ecx - popl %eax - sti /* Reenable interrupts */ - pushl $0 /* zero code */ - pushl $0 /* zero trap number */ - pusha /* save general registers */ - push %ds /* save segment registers */ - push %es - push %fs - push %gs - mov %ss,%dx /* switch to kernel segments */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs - - /* - * See if we interrupted a kernel-loaded thread executing - * in its own task. - */ - CPU_NUMBER(%edx) - testl $(EFL_VM),R_EFLAGS(%esp) /* in V86 mode? */ - jnz 0f /* user mode trap if so */ - testb $3,R_CS(%esp) - jnz 0f /* user mode, back to normal */ -#ifdef FIXME - cmpl ETEXT_ADDR,R_EIP(%esp) - jb 0f /* not kernel-loaded, back to normal */ -#endif + * Take an AST from an interrupted user + */ +ast_from_interrupt_user: + movl %gs:CPU_PENDING_AST,%eax + testl %eax,%eax /* pending ASTs? */ + je EXT(ret_to_user) /* no, nothing to do */ - /* - * Transfer the current stack frame by hand into the PCB. - */ - CAH(afistart) - movl %gs:CPU_ACTIVE_KLOADED,%eax - movl %gs:CPU_KERNEL_STACK,%ebx - xchgl %ebx,%esp - FRAME_STACK_TO_PCB(%eax,%ebx) - CAH(afiend) - TIME_TRAP_UENTRY - jmp 3f -0: TIME_TRAP_UENTRY - movl %gs:CPU_KERNEL_STACK,%eax - /* switch to kernel stack */ - xchgl %eax,%esp -3: - pushl %eax - pushl $0 /* push preemption flag */ - call EXT(i386_astintr) /* take the AST */ - addl $4,%esp /* pop preemption flag */ - popl %esp /* back to PCB stack */ jmp EXT(return_from_trap) /* return */ -#if MACH_KDB || MACH_KGDB -/* - * kdb_kintr: enter kdb from keyboard interrupt. - * Chase down the stack frames until we find one whose return - * address is the interrupt handler. At that point, we have: - * - * frame-> saved %ebp - * return address in interrupt handler - * ivect - * saved SPL - * return address == return_to_iret_i - * saved %edx - * saved %ecx - * saved %eax - * saved %eip - * saved %cs - * saved %efl + +/******************************************************************************************************* * - * OR: - * frame-> saved %ebp - * return address in interrupt handler - * ivect - * saved SPL - * return address == return_to_iret - * pointer to save area on old stack - * [ saved %ebx, if accurate timing ] + * 32bit Tasks + * System call entries via INTR_GATE or sysenter: * - * old stack: saved %es - * saved %ds - * saved %edx - * saved %ecx - * saved %eax - * saved %eip - * saved %cs - * saved %efl + * esp -> i386_saved_state_t + * cr3 -> kernel directory + * esp -> low based stack + * gs -> CPU_DATA_GS + * cs -> KERNEL_CS + * ss/ds/es -> KERNEL_DS * - * Call kdb, passing it that register save area. - */ - -#if MACH_KGDB -Entry(kgdb_kintr) -#endif /* MACH_KGDB */ -#if MACH_KDB -Entry(kdb_kintr) -#endif /* MACH_KDB */ - movl %ebp,%eax /* save caller`s frame pointer */ - movl $ EXT(return_to_iret),%ecx /* interrupt return address 1 */ - movl $ EXT(return_to_iret_i),%edx /* interrupt return address 2 */ - -0: cmpl 16(%eax),%ecx /* does this frame return to */ - /* interrupt handler (1)? */ - je 1f - cmpl $kdb_from_iret,16(%eax) - je 1f - cmpl 16(%eax),%edx /* interrupt handler (2)? */ - je 2f /* if not: */ - cmpl $kdb_from_iret_i,16(%eax) - je 2f - movl (%eax),%eax /* try next frame */ - jmp 0b - -1: movl $kdb_from_iret,16(%eax) /* returns to kernel/user stack */ - ret - -2: movl $kdb_from_iret_i,16(%eax) - /* returns to interrupt stack */ - ret - -/* - * On return from keyboard interrupt, we will execute - * kdb_from_iret_i - * if returning to an interrupt on the interrupt stack - * kdb_from_iret - * if returning to an interrupt on the user or kernel stack - */ -kdb_from_iret: - /* save regs in known locations */ - pushl %ebx /* caller`s %ebx is in reg */ - pushl %ebp - pushl %esi - pushl %edi - push %fs - push %gs -#if MACH_KGDB - cli - pushl %esp /* pass regs */ - call EXT(kgdb_kentry) /* to kgdb */ - addl $4,%esp /* pop parameters */ -#endif /* MACH_KGDB */ -#if MACH_KDB - pushl %esp /* pass regs */ - call EXT(kdb_kentry) /* to kdb */ - addl $4,%esp /* pop parameters */ -#endif /* MACH_KDB */ - pop %gs /* restore registers */ - pop %fs - popl %edi - popl %esi - popl %ebp - popl %ebx - jmp EXT(return_to_iret) /* normal interrupt return */ - -kdb_from_iret_i: /* on interrupt stack */ - pop %edx /* restore saved registers */ - pop %ecx - pop %eax - pushl $0 /* zero error code */ - pushl $0 /* zero trap number */ - pusha /* save general registers */ - push %ds /* save segment registers */ - push %es - push %fs - push %gs -#if MACH_KGDB - cli /* disable interrupts */ - CPU_NUMBER(%edx) /* get CPU number */ - movl CX(EXT(kgdb_stacks),%edx),%ebx - xchgl %ebx,%esp /* switch to kgdb stack */ - pushl %ebx /* pass old sp as an arg */ - call EXT(kgdb_from_kernel) - popl %esp /* switch back to interrupt stack */ -#endif /* MACH_KGDB */ -#if MACH_KDB - pushl %esp /* pass regs, */ - pushl $0 /* code, */ - pushl $-1 /* type to kdb */ - call EXT(kdb_trap) - addl $12,%esp -#endif /* MACH_KDB */ - pop %gs /* restore segment registers */ - pop %fs - pop %es - pop %ds - popa /* restore general registers */ - addl $8,%esp - iret - -#endif /* MACH_KDB || MACH_KGDB */ - - -/* - * Mach RPC enters through a call gate, like a system call. - */ - -Entry(mach_rpc) - pushf /* save flags as soon as possible */ - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs - - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs - -/* - * Shuffle eflags,eip,cs into proper places + * interrupts disabled + * direction flag cleared */ - movl R_EIP(%esp),%ebx /* eflags are in EIP slot */ - movl R_CS(%esp),%ecx /* eip is in CS slot */ - movl R_EFLAGS(%esp),%edx /* cs is in EFLAGS slot */ - movl %ecx,R_EIP(%esp) /* fix eip */ - movl %edx,R_CS(%esp) /* fix cs */ - movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - +Entry(lo_sysenter) + /* + * We can be here either for a mach syscall or a unix syscall, + * as indicated by the sign of the code: + */ + movl R_EAX(%esp),%eax + testl %eax,%eax + js EXT(lo_mach_scall) /* < 0 => mach */ + /* > 0 => unix */ + +Entry(lo_unix_scall) TIME_TRAP_UENTRY - negl %eax /* get system call number */ - shll $4,%eax /* manual indexing */ - -/* - * Check here for mach_rpc from kernel-loaded task -- - * - Note that kernel-loaded task returns via real return. - * We didn't enter here "through" PCB (i.e., using ring 0 stack), - * so transfer the stack frame into the PCB explicitly, then - * start running on resulting "PCB stack". We have to set - * up a simulated "uesp" manually, since there's none in the - * frame. - */ - cmpl $0,%gs:CPU_ACTIVE_KLOADED - jz 2f - CAH(mrstart) - movl %gs:CPU_ACTIVE_KLOADED,%ebx - movl %gs:CPU_KERNEL_STACK,%edx - xchgl %edx,%esp - - FRAME_STACK_TO_PCB(%ebx,%edx) - CAH(mrend) - - jmp 3f - -2: movl %gs:CPU_KERNEL_STACK,%ebx - /* get current kernel stack */ - xchgl %ebx,%esp /* switch stacks - %ebx points to */ - /* user registers. */ - -3: - -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer - */ -#undef RPC_TRAP_REGISTERS -#ifdef RPC_TRAP_REGISTERS - pushl R_ESI(%ebx) - pushl R_EDI(%ebx) - pushl R_ECX(%ebx) - pushl R_EDX(%ebx) -#else - movl EXT(mach_trap_table)(%eax),%ecx - /* get number of arguments */ - jecxz 2f /* skip argument copy if none */ - movl R_UESP(%ebx),%esi /* get user stack pointer */ - lea 4(%esi,%ecx,4),%esi /* skip user return address, */ - /* and point past last argument */ - movl %gs:CPU_ACTIVE_KLOADED,%edx - /* point to current thread */ - orl %edx,%edx /* if ! kernel-loaded, check addr */ - jz 4f /* else */ - mov %ds,%dx /* kernel data segment access */ - jmp 5f -4: - cmpl $(VM_MAX_ADDRESS),%esi /* in user space? */ - ja mach_call_addr /* address error if not */ - movl $ USER_DS,%edx /* user data segment access */ -5: - mov %dx,%fs - movl %esp,%edx /* save kernel ESP for error recovery */ -1: - subl $4,%esi - RECOVERY_SECTION - RECOVER(mach_call_addr_push) - pushl %fs:(%esi) /* push argument on stack */ - loop 1b /* loop for all arguments */ -#endif + xchgl %ebx,%esp /* switch to kernel stack */ -/* - * Register use on entry: - * eax contains syscall number << 4 - * mach_call_munger is declared regparm(1), so the first arg is %eax - */ -2: + sti + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ + movl ACT_TASK(%ecx),%ecx /* point to current task */ + addl $1,TASK_SYSCALLS_UNIX(%ecx) /* increment call count */ - call EXT(mach_call_munger) + CCALL1(unix_syscall, %ebx) + /* + * always returns through thread_exception_return + */ - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - movl %eax,R_EAX(%esp) /* save return value */ - jmp EXT(return_from_trap) /* return to user */ - -/* - * Special system call entry for "int 0x80", which has the "eflags" - * register saved at the right place already. - * Fall back to the common syscall path after saving the registers. - * - * esp -> old eip - * old cs - * old eflags - * old esp if trapped from user - * old ss if trapped from user - * - * XXX: for the moment, we don't check for int 0x80 from kernel mode. - */ -Entry(syscall_int80) - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ +Entry(lo_mach_scall) + TIME_TRAP_UENTRY - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs + movl %gs:CPU_KERNEL_STACK,%ebx + xchgl %ebx,%esp /* switch to kernel stack */ - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs + sti + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ + movl ACT_TASK(%ecx),%ecx /* point to current task */ + addl $1,TASK_SYSCALLS_MACH(%ecx) /* increment call count */ - jmp syscall_entry_3 + CCALL1(mach_call_munger, %ebx) + /* + * always returns through thread_exception_return + */ -/* - * System call enters through a call gate. Flags are not saved - - * we must shuffle stack to look like trap save area. - * - * esp-> old eip - * old cs - * old esp - * old ss - * - * eax contains system call number. - * - * NB: below use of CPU_NUMBER assumes that macro will use correct - * correct segment register for any kernel data accesses. - */ -Entry(syscall) -syscall_entry: - pushf /* save flags as soon as possible */ -syscall_entry_2: - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs - - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs + +Entry(lo_mdep_scall) + TIME_TRAP_UENTRY -/* - * Shuffle eflags,eip,cs into proper places - */ + movl %gs:CPU_KERNEL_STACK,%ebx + xchgl %ebx,%esp /* switch to kernel stack */ - movl R_EIP(%esp),%ebx /* eflags are in EIP slot */ - movl R_CS(%esp),%ecx /* eip is in CS slot */ - movl R_EFLAGS(%esp),%edx /* cs is in EFLAGS slot */ - movl %ecx,R_EIP(%esp) /* fix eip */ - movl %edx,R_CS(%esp) /* fix cs */ - movl %ebx,R_EFLAGS(%esp) /* fix eflags */ + sti -syscall_entry_3: -/* - * Check here for syscall from kernel-loaded task -- - * We didn't enter here "through" PCB (i.e., using ring 0 stack), - * so transfer the stack frame into the PCB explicitly, then - * start running on resulting "PCB stack". We have to set - * up a simulated "uesp" manually, since there's none in the - * frame. - */ - cmpl $0,%gs:CPU_ACTIVE_KLOADED - jz 0f - CAH(scstart) - movl %gs:CPU_ACTIVE_KLOADED,%ebx - movl %gs:CPU_KERNEL_STACK,%edx - xchgl %edx,%esp - FRAME_STACK_TO_PCB(%ebx,%edx) - CAH(scend) - TIME_TRAP_UENTRY - jmp 1f + CCALL1(machdep_syscall, %ebx) + /* + * always returns through thread_exception_return + */ + -0: +Entry(lo_diag_scall) TIME_TRAP_UENTRY - movl %gs:CPU_KERNEL_STACK,%ebx - /* get current kernel stack */ - xchgl %ebx,%esp /* switch stacks - %ebx points to */ - /* user registers. */ - /* user regs pointer already set */ - -/* - * Native system call. - * Register use on entry: - * eax contains syscall number - * ebx points to user regs - */ -1: - negl %eax /* get system call number */ - jl mach_call_range /* out of range if it was positive */ - - cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ - jg mach_call_range /* error if out of range */ - shll $4,%eax /* manual indexing */ + movl %gs:CPU_KERNEL_STACK,%ebx // Get the address of the kernel stack + xchgl %ebx,%esp // Switch to it, saving the previous - movl EXT(mach_trap_table)+4(%eax),%edx - /* get procedure */ - cmpl $ EXT(kern_invalid),%edx /* if not "kern_invalid" */ - jne do_native_call /* go on with Mach syscall */ - shrl $4,%eax /* restore syscall number */ - jmp mach_call_range /* try it as a "server" syscall */ + CCALL1(diagCall, %ebx) // Call diagnostics + cli // Disable interruptions just in case they were enabled + popl %esp // Get back the original stack + + cmpl $0,%eax // What kind of return is this? + jne EXT(return_to_user) // Normal return, do not check asts... + + CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) + // pass what would be the diag syscall + // error return - cause an exception + /* no return */ + -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer - */ -do_native_call: - movl EXT(mach_trap_table)(%eax),%ecx - /* get number of arguments */ - jecxz mach_call_call /* skip argument copy if none */ - movl R_UESP(%ebx),%esi /* get user stack pointer */ - lea 4(%esi,%ecx,4),%esi /* skip user return address, */ - /* and point past last argument */ - movl %gs:CPU_ACTIVE_KLOADED,%edx - /* point to current thread */ - orl %edx,%edx /* if kernel-loaded, skip addr check */ - jz 0f /* else */ - mov %ds,%dx /* kernel data segment access */ - jmp 1f -0: - cmpl $(VM_MAX_ADDRESS),%esi /* in user space? */ - ja mach_call_addr /* address error if not */ - movl $ USER_DS,%edx /* user data segment access */ -1: - mov %dx,%fs - movl %esp,%edx /* save kernel ESP for error recovery */ -2: - subl $4,%esi - RECOVERY_SECTION - RECOVER(mach_call_addr_push) - pushl %fs:(%esi) /* push argument on stack */ - loop 2b /* loop for all arguments */ -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer +/******************************************************************************************************* + * + * 64bit Tasks + * System call entries via syscall only: + * + * esp -> x86_saved_state64_t + * cr3 -> kernel directory + * esp -> low based stack + * gs -> CPU_DATA_GS + * cs -> KERNEL_CS + * ss/ds/es -> KERNEL_DS + * + * interrupts disabled + * direction flag cleared */ -mach_call_call: - CAH(call_call) +Entry(lo_syscall) + /* + * We can be here either for a mach, unix machdep or diag syscall, + * as indicated by the syscall class: + */ + movl R64_RAX(%esp), %eax /* syscall number/class */ + movl %eax, %ebx + andl $(SYSCALL_CLASS_MASK), %ebx /* syscall class */ + cmpl $(SYSCALL_CLASS_MACH<map */ - movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ - cmpl EXT(kernel_pmap), %ecx - jz 1f - movl $ USER_DS,%cx /* user data segment access */ - mov %cx,%ds -1: - cmpl %esi,%eax - jb copyin_fail /* fail if wrap-around */ cld /* count up */ movl %edx,%ecx /* move by longwords first */ shrl $2,%ecx @@ -1798,66 +940,62 @@ ENTRY(copyin) rep movsb xorl %eax,%eax /* return 0 for success */ -copy_ret: - mov %ss,%di /* restore kernel data segment */ - mov %di,%ds +copyin_ret: + mov %ss,%cx /* restore kernel data and extended segments */ + mov %cx,%ds + mov %cx,%es popl %edi /* restore registers */ popl %esi ret /* and return */ copyin_fail: - movl $ EFAULT,%eax /* return error for failure */ - jmp copy_ret /* pop frame and return */ + movl $(EFAULT),%eax /* return error for failure */ + jmp copyin_ret /* pop frame and return */ + + /* - * Copy string from user address space. - * arg0: user address + * Copy string from user/kern address space. + * arg0: window offset or kernel address * arg1: kernel address * arg2: max byte count * arg3: actual byte count (OUT) */ -Entry(copyinstr) +Entry(copyinstr_kern) + mov %ds,%cx + jmp copyinstr_common + +Entry(copyinstr_user) + movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ + +copyinstr_common: + mov %cx,%fs + pushl %esi pushl %edi /* save registers */ - movl 8+S_ARG0,%esi /* get user start address */ - movl 8+S_ARG1,%edi /* get kernel destination address */ + movl 8+S_ARG0,%esi /* get source - window offset or kernel address */ + movl 8+S_ARG1,%edi /* get destination - kernel address */ movl 8+S_ARG2,%edx /* get count */ - lea 0(%esi,%edx),%eax /* get user end address + 1 */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get active thread */ - movl ACT_MAP(%ecx),%ecx /* get act->map */ - movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ - cmpl EXT(kernel_pmap), %ecx - jne 0f - mov %ds,%cx /* kernel data segment access */ - jmp 1f -0: - movl $ USER_DS,%cx /* user data segment access */ -1: - mov %cx,%fs - xorl %eax,%eax - cmpl $0,%edx - je 4f + xorl %eax,%eax /* set to 0 here so that the high 24 bits */ + /* are 0 for the cmpl against 0 */ 2: RECOVERY_SECTION RECOVER(copystr_fail) /* copy bytes... */ - movb %fs:(%esi),%eax + movb %fs:(%esi),%al incl %esi testl %edi,%edi /* if kernel address is ... */ jz 3f /* not NULL */ - movb %eax,(%edi) /* copy the byte */ + movb %al,(%edi) /* copy the byte */ incl %edi 3: - decl %edx - je 5f /* Zero count.. error out */ - cmpl $0,%eax - jne 2b /* .. a NUL found? */ - jmp 4f /* return zero (%eax) */ -5: - movl $ ENAMETOOLONG,%eax /* String is too long.. */ + testl %eax,%eax /* did we just stuff the 0-byte? */ + jz 4f /* yes, return 0 status already in %eax */ + decl %edx /* decrement #bytes left in buffer */ + jnz 2b /* buffer not full so copy in another byte */ + movl $(ENAMETOOLONG),%eax /* buffer full but no 0-byte: ENAMETOOLONG */ 4: movl 8+S_ARG3,%edi /* get OUT len ptr */ cmpl $0,%edi @@ -1870,271 +1008,66 @@ copystr_ret: ret /* and return */ copystr_fail: - movl $ EFAULT,%eax /* return error for failure */ - jmp copy_ret /* pop frame and return */ + movl $(EFAULT),%eax /* return error for failure */ + jmp copystr_ret /* pop frame and return */ + /* - * Copy to user address space. + * Copy to user/kern address space. * arg0: kernel address - * arg1: user address + * arg1: window offset or kernel address * arg2: byte count */ -Entry(copyoutmsg) -ENTRY(copyout) - pushl %esi - pushl %edi /* save registers */ - pushl %ebx - - movl 12+S_ARG0,%esi /* get kernel start address */ - movl 12+S_ARG1,%edi /* get user start address */ - movl 12+S_ARG2,%edx /* get count */ +ENTRY(copyoutphys_user) + movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ + mov %cx,%es - leal 0(%edi,%edx),%eax /* get user end address + 1 */ +ENTRY(copyoutphys_kern) + movl $(PHYS_WINDOW_SEL),%ecx /* physical access through kernel window */ + mov %cx,%ds + jmp copyout_common - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get active thread */ - movl ACT_MAP(%ecx),%ecx /* get act->map */ - movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ - cmpl EXT(kernel_pmap), %ecx - jne 0f - mov %ds,%cx /* else kernel data segment access */ - jmp 1f -0: - movl $ USER_DS,%cx -1: +ENTRY(copyout_user) + movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ mov %cx,%es -/* - * Check whether user address space is writable - * before writing to it - hardware is broken. - * - * Skip check if "user" address is really in - * kernel space (i.e., if it's in a kernel-loaded - * task). - * - * Register usage: - * esi/edi source/dest pointers for rep/mov - * ecx counter for rep/mov - * edx counts down from 3rd arg - * eax count of bytes for each (partial) page copy - * ebx shadows edi, used to adjust edx - */ - movl %edi,%ebx /* copy edi for syncing up */ -copyout_retry: - /* if restarting after a partial copy, put edx back in sync, */ - addl %ebx,%edx /* edx -= (edi - ebx); */ - subl %edi,%edx / - movl %edi,%ebx /* ebx = edi; */ +ENTRY(copyout_kern) + +copyout_common: + pushl %esi + pushl %edi /* save registers */ + + movl 8+S_ARG0,%esi /* get source - kernel address */ + movl 8+S_ARG1,%edi /* get destination - window offset or kernel address */ + movl 8+S_ARG2,%edx /* get count */ -/* - * Copy only what fits on the current destination page. - * Check for write-fault again on the next page. - */ - leal NBPG(%edi),%eax /* point to */ - andl $(-NBPG),%eax /* start of next page */ - subl %edi,%eax /* get number of bytes to that point */ - cmpl %edx,%eax /* bigger than count? */ - jle 1f /* if so, */ - movl %edx,%eax /* use count */ -1: cld /* count up */ - movl %eax,%ecx /* move by longwords first */ + movl %edx,%ecx /* move by longwords first */ shrl $2,%ecx RECOVERY_SECTION RECOVER(copyout_fail) - RETRY_SECTION - RETRY(copyout_retry) rep movsl - movl %eax,%ecx /* now move remaining bytes */ + movl %edx,%ecx /* now move remaining bytes */ andl $3,%ecx RECOVERY_SECTION RECOVER(copyout_fail) - RETRY_SECTION - RETRY(copyout_retry) rep movsb /* move */ - movl %edi,%ebx /* copy edi for syncing up */ - subl %eax,%edx /* and decrement count */ - jg copyout_retry /* restart on next page if not done */ xorl %eax,%eax /* return 0 for success */ copyout_ret: - mov %ss,%di /* restore kernel segment */ - mov %di,%es + mov %ss,%cx /* restore kernel segment */ + mov %cx,%es + mov %cx,%ds - popl %ebx popl %edi /* restore registers */ popl %esi ret /* and return */ copyout_fail: - movl $ EFAULT,%eax /* return error for failure */ + movl $(EFAULT),%eax /* return error for failure */ jmp copyout_ret /* pop frame and return */ -/* - * FPU routines. - */ - -/* - * Initialize FPU. - */ -ENTRY(_fninit) - fninit - ret - -/* - * Read control word - */ -ENTRY(_fstcw) - pushl %eax /* get stack space */ - fstcw (%esp) - popl %eax - ret - -/* - * Set control word - */ -ENTRY(_fldcw) - fldcw 4(%esp) - ret - -/* - * Read status word - */ -ENTRY(_fnstsw) - xor %eax,%eax /* clear high 16 bits of eax */ - fnstsw %ax /* read FP status */ - ret - -/* - * Clear FPU exceptions - */ -ENTRY(_fnclex) - fnclex - ret - -/* - * Clear task-switched flag. - */ -ENTRY(_clts) - clts - ret - -/* - * Save complete FPU state. Save error for later. - */ -ENTRY(_fpsave) - movl 4(%esp),%eax /* get save area pointer */ - fnsave (%eax) /* save complete state, including */ - /* errors */ - ret - -/* - * Restore FPU state. - */ -ENTRY(_fprestore) - movl 4(%esp),%eax /* get save area pointer */ - frstor (%eax) /* restore complete state */ - ret - -/* - * Set cr3 - */ -ENTRY(set_cr3) - CPU_NUMBER(%eax) - orl 4(%esp), %eax - /* - * Don't set PDBR to a new value (hence invalidating the - * "paging cache") if the new value matches the current one. - */ - movl %cr3,%edx /* get current cr3 value */ - cmpl %eax,%edx - je 0f /* if two are equal, don't set */ - movl %eax,%cr3 /* load it (and flush cache) */ -0: - ret - -/* - * Read cr3 - */ -ENTRY(get_cr3) - movl %cr3,%eax - andl $(~0x7), %eax /* remove cpu number */ - ret - -/* - * Flush TLB - */ -ENTRY(flush_tlb) - movl %cr3,%eax /* flush tlb by reloading CR3 */ - movl %eax,%cr3 /* with itself */ - ret - -/* - * Read cr2 - */ -ENTRY(get_cr2) - movl %cr2,%eax - ret - -/* - * Read cr4 - */ -ENTRY(get_cr4) - .byte 0x0f,0x20,0xe0 /* movl %cr4, %eax */ - ret - -/* - * Write cr4 - */ -ENTRY(set_cr4) - movl 4(%esp), %eax - .byte 0x0f,0x22,0xe0 /* movl %eax, %cr4 */ - ret - -/* - * Read ldtr - */ -Entry(get_ldt) - xorl %eax,%eax - sldt %ax - ret - -/* - * Set ldtr - */ -Entry(set_ldt) - lldt 4(%esp) - ret - -/* - * Read task register. - */ -ENTRY(get_tr) - xorl %eax,%eax - str %ax - ret - -/* - * Set task register. Also clears busy bit of task descriptor. - */ -ENTRY(set_tr) - movl S_ARG0,%eax /* get task segment number */ - subl $8,%esp /* push space for SGDT */ - sgdt 2(%esp) /* store GDT limit and base (linear) */ - movl 4(%esp),%edx /* address GDT */ - movb $(K_TSS),5(%edx,%eax) /* fix access byte in task descriptor */ - ltr %ax /* load task register */ - addl $8,%esp /* clear stack */ - ret /* and return */ - -/* - * Set task-switched flag. - */ -ENTRY(_setts) - movl %cr0,%eax /* get cr0 */ - orl $(CR0_TS),%eax /* or in TS bit */ - movl %eax,%cr0 /* set cr0 */ - ret /* * io register must not be used on slaves (no AT bus) @@ -2393,79 +1326,6 @@ ENTRY(insl) POP_FRAME ret - -/* - * int inst_fetch(int eip, int cs); - * - * Fetch instruction byte. Return -1 if invalid address. - */ - .globl EXT(inst_fetch) -LEXT(inst_fetch) - movl S_ARG1, %eax /* get segment */ - movw %ax,%fs /* into FS */ - movl S_ARG0, %eax /* get offset */ - RETRY_SECTION - RETRY(EXT(inst_fetch)) /* re-load FS on retry */ - RECOVERY_SECTION - RECOVER(EXT(inst_fetch_fault)) - movzbl %fs:(%eax),%eax /* load instruction byte */ - ret - -LEXT(inst_fetch_fault) - movl $-1,%eax /* return -1 if error */ - ret - - -#if MACH_KDP -/* - * kdp_copy_kmem(char *src, char *dst, int count) - * - * Similar to copyin except that both addresses are kernel addresses. - */ - -ENTRY(kdp_copy_kmem) - pushl %esi - pushl %edi /* save registers */ - - movl 8+S_ARG0,%esi /* get kernel start address */ - movl 8+S_ARG1,%edi /* get kernel destination address */ - - movl 8+S_ARG2,%edx /* get count */ - - lea 0(%esi,%edx),%eax /* get kernel end address + 1 */ - - cmpl %esi,%eax - jb kdp_vm_read_fail /* fail if wrap-around */ - cld /* count up */ - movl %edx,%ecx /* move by longwords first */ - shrl $2,%ecx - RECOVERY_SECTION - RECOVER(kdp_vm_read_fail) - rep - movsl /* move longwords */ - movl %edx,%ecx /* now move remaining bytes */ - andl $3,%ecx - RECOVERY_SECTION - RECOVER(kdp_vm_read_fail) - rep - movsb -kdp_vm_read_done: - movl 8+S_ARG2,%edx /* get count */ - subl %ecx,%edx /* Return number of bytes transfered */ - movl %edx,%eax - - popl %edi /* restore registers */ - popl %esi - ret /* and return */ - -kdp_vm_read_fail: - xorl %eax,%eax /* didn't copy a thing. */ - - popl %edi - popl %esi - ret -#endif - /* * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi) */ @@ -2486,12 +1346,10 @@ rdmsr_fail: ret /* - * Done with recovery and retry tables. + * Done with recovery table. */ RECOVERY_SECTION RECOVER_TABLE_END - RETRY_SECTION - RETRY_TABLE_END @@ -2541,13 +1399,13 @@ ENTRY(dr3) movzbl B_ARG1, %eax andb $3, %al - addb $0x10, %ecx + addb $0x10, %cl shll %cl, %eax orl %eax, %edx movzbl B_ARG2, %eax andb $3, %al - addb $0x2, %ecx + addb $0x2, %cl shll %cl, %eax orl %eax, %edx @@ -2566,6 +1424,7 @@ dr_msk: ENTRY(dr_addr) .long 0,0,0,0 .long 0,0,0,0 + .text ENTRY(get_cr0) @@ -2659,34 +1518,6 @@ ENTRY(get_pc) movl 4(%ebp),%eax ret -#if ETAP - -ENTRY(etap_get_pc) - movl 4(%ebp), %eax /* fetch pc of caller */ - ret - -ENTRY(tvals_to_etap) - movl S_ARG0, %eax - movl $1000000000, %ecx - mull %ecx - addl S_ARG1, %eax - adc $0, %edx - ret - -/* etap_time_t - * etap_time_sub(etap_time_t stop, etap_time_t start) - * - * 64bit subtract, returns stop - start - */ -ENTRY(etap_time_sub) - movl S_ARG0, %eax /* stop.low */ - movl S_ARG1, %edx /* stop.hi */ - subl S_ARG2, %eax /* stop.lo - start.lo */ - sbbl S_ARG3, %edx /* stop.hi - start.hi */ - ret - -#endif /* ETAP */ - ENTRY(minsecurity) pushl %ebp movl %esp,%ebp @@ -2770,174 +1601,33 @@ ENTRY(mul_scale) POP_FRAME ret -#ifdef MACH_BSD -/* - * BSD System call entry point.. - */ - -Entry(trap_unix_syscall) -trap_unix_addr: - pushf /* save flags as soon as possible */ -trap_unix_2: - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs - - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs - -/* - * Shuffle eflags,eip,cs into proper places - */ - - movl R_EIP(%esp),%ebx /* eflags are in EIP slot */ - movl R_CS(%esp),%ecx /* eip is in CS slot */ - movl R_EFLAGS(%esp),%edx /* cs is in EFLAGS slot */ - movl %ecx,R_EIP(%esp) /* fix eip */ - movl %edx,R_CS(%esp) /* fix cs */ - movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - - TIME_TRAP_UENTRY - - negl %eax /* get system call number */ - shll $4,%eax /* manual indexing */ - - movl %gs:CPU_KERNEL_STACK,%ebx - /* get current kernel stack */ - xchgl %ebx,%esp /* switch stacks - %ebx points to */ - /* user registers. */ - -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer - */ - CAH(call_call) - pushl %ebx /* Push the regs set onto stack */ - call EXT(unix_syscall) - popl %ebx - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - movl %eax,R_EAX(%esp) /* save return value */ - jmp EXT(return_from_trap) /* return to user */ + /* - * Entry point for machdep system calls.. + * Double-fault exception handler task. The last gasp... */ +Entry(df_task_start) + CCALL1(panic_double_fault, $(T_DOUBLE_FAULT)) + hlt -Entry(trap_machdep_syscall) - pushf /* save flags as soon as possible */ - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs - - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs /* - * Shuffle eflags,eip,cs into proper places + * machine-check handler task. The last gasp... */ - - movl R_EIP(%esp),%ebx /* eflags are in EIP slot */ - movl R_CS(%esp),%ecx /* eip is in CS slot */ - movl R_EFLAGS(%esp),%edx /* cs is in EFLAGS slot */ - movl %ecx,R_EIP(%esp) /* fix eip */ - movl %edx,R_CS(%esp) /* fix cs */ - movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - - TIME_TRAP_UENTRY - - negl %eax /* get system call number */ - shll $4,%eax /* manual indexing */ - - movl %gs:CPU_KERNEL_STACK,%ebx - /* get current kernel stack */ - xchgl %ebx,%esp /* switch stacks - %ebx points to */ - /* user registers. */ - -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer - */ - CAH(call_call) - pushl %ebx - call EXT(machdep_syscall) - popl %ebx - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - movl %eax,R_EAX(%esp) /* save return value */ - jmp EXT(return_from_trap) /* return to user */ - -Entry(trap_mach25_syscall) - pushf /* save flags as soon as possible */ - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - - pusha /* save the general registers */ - pushl %ds /* and the segment registers */ - pushl %es - pushl %fs - pushl %gs - - mov %ss,%dx /* switch to kernel data segment */ - mov %dx,%ds - mov %dx,%es - mov $ CPU_DATA_GS,%dx - mov %dx,%gs +Entry(mc_task_start) + CCALL1(panic_machine_check, $(T_MACHINE_CHECK)) + hlt /* - * Shuffle eflags,eip,cs into proper places + * Compatibility mode's last gasp... */ +Entry(lo_df64) + movl %esp, %eax + CCALL1(panic_double_fault64, %eax) + hlt - movl R_EIP(%esp),%ebx /* eflags are in EIP slot */ - movl R_CS(%esp),%ecx /* eip is in CS slot */ - movl R_EFLAGS(%esp),%edx /* cs is in EFLAGS slot */ - movl %ecx,R_EIP(%esp) /* fix eip */ - movl %edx,R_CS(%esp) /* fix cs */ - movl %ebx,R_EFLAGS(%esp) /* fix eflags */ +Entry(lo_mc64) + movl %esp, %eax + CCALL1(panic_machine_check64, %eax) + hlt - TIME_TRAP_UENTRY - - negl %eax /* get system call number */ - shll $4,%eax /* manual indexing */ - - movl %gs:CPU_KERNEL_STACK,%ebx - /* get current kernel stack */ - xchgl %ebx,%esp /* switch stacks - %ebx points to */ - /* user registers. */ - -/* - * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer - */ - CAH(call_call) - pushl %ebx - call EXT(mach25_syscall) - popl %ebx - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - movl %eax,R_EAX(%esp) /* save return value */ - jmp EXT(return_from_trap) /* return to user */ - -#endif diff --git a/osfmk/i386/loose_ends.c b/osfmk/i386/loose_ends.c index 4ce9fd757..ada20c4ac 100644 --- a/osfmk/i386/loose_ends.c +++ b/osfmk/i386/loose_ends.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -67,20 +68,26 @@ #include #include +#include +#include + +#if 0 + +#undef KERNEL_DEBUG +#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT +#define KDEBUG 1 + +#endif + /* XXX - should be gone from here */ extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys); extern void flush_dcache64(addr64_t addr, unsigned count, int phys); extern boolean_t phys_page_exists(ppnum_t); -extern pt_entry_t *pmap_mapgetpte(vm_map_t, vm_offset_t); extern void bcopy_no_overwrite(const char *from, char *to,vm_size_t bytes); extern void pmap_set_reference(ppnum_t pn); extern void mapping_set_mod(ppnum_t pa); extern void mapping_set_ref(ppnum_t pn); -extern void switch_to_serial_console(void); -extern kern_return_t copyp2p(vm_offset_t source, - vm_offset_t dest, - unsigned int size, - unsigned int flush_action); + extern void fillPage(ppnum_t pa, unsigned int fill); extern void ovbcopy(const char *from, char *to, @@ -97,71 +104,28 @@ bzero_phys( addr64_t src64, vm_size_t bytes) { - vm_offset_t src = low32(src64); - pt_entry_t save2; - mp_disable_preemption(); - if (*(pt_entry_t *) CM2) - panic("bzero_phys: CMAP busy"); + mapwindow_t *map; + pt_entry_t save; - *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - save2=*(pt_entry_t *)CM2; - invlpg((u_int)CA2); - - bzero((void *)((unsigned int)CA2 | (src & INTEL_OFFMASK)), bytes); - if (save2 != *(pt_entry_t *)CM2) panic("bzero_phys CMAP changed"); - *(pt_entry_t *) CM2 = 0; - mp_enable_preemption(); -} + mp_disable_preemption(); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF | INTEL_PTE_MOD)); + if (map == 0) { + panic("bzero_phys: CMAP busy"); + } + save = *map->prv_CMAP; -/* - * copy 'size' bytes from physical to physical address - * the caller must validate the physical ranges - * - * if flush_action == 0, no cache flush necessary - * if flush_action == 1, flush the source - * if flush_action == 2, flush the dest - * if flush_action == 3, flush both source and dest - */ + invlpg((uintptr_t)map->prv_CADDR); -kern_return_t -copyp2p(vm_offset_t source, - vm_offset_t dest, - unsigned int size, - unsigned int flush_action) -{ - - switch(flush_action) { - case 1: - flush_dcache(source, size, 1); - break; - case 2: - flush_dcache(dest, size, 1); - break; - case 3: - flush_dcache(source, size, 1); - flush_dcache(dest, size, 1); - break; + bzero((void *)((uintptr_t)map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)), bytes); - } - bcopy_phys((addr64_t)source, (addr64_t)dest, (vm_size_t)size); /* Do a physical copy */ - - switch(flush_action) { - case 1: - flush_dcache(source, size, 1); - break; - case 2: - flush_dcache(dest, size, 1); - break; - case 3: - flush_dcache(source, size, 1); - flush_dcache(dest, size, 1); - break; + if (save != *map->prv_CMAP) + panic("bzero_phys: CMAP changed"); + *map->prv_CMAP = 0; - } - return KERN_SUCCESS; + mp_enable_preemption(); } + /* * bcopy_phys - like bcopy but copies from/to physical addresses. */ @@ -172,30 +136,38 @@ bcopy_phys( addr64_t dst64, vm_size_t bytes) { - vm_offset_t src = low32(src64); - vm_offset_t dst = low32(dst64); - pt_entry_t save1,save2; - /* ensure we stay within a page */ - if ( (((src & (NBPG-1)) + bytes) > NBPG) || - (((dst & (NBPG-1)) + bytes) > NBPG) ) panic("bcopy_phys"); - mp_disable_preemption(); - if (*(pt_entry_t *) CM1 || *(pt_entry_t *) CM2) - panic("bcopy_phys: CMAP busy"); - - *(pt_entry_t *) CM1 = INTEL_PTE_VALID | (src & PG_FRAME) | INTEL_PTE_REF; - *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (dst & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - save1 = *(pt_entry_t *)CM1;save2 = *(pt_entry_t *)CM2; - invlpg((u_int)CA1); - invlpg((u_int)CA2); - - bcopy((void *) ((uintptr_t)CA1 | (src & INTEL_OFFMASK)), - (void *) ((uintptr_t)CA2 | (dst & INTEL_OFFMASK)), bytes); - if ( (save1 != *(pt_entry_t *)CM1) || (save2 != *(pt_entry_t *)CM2)) panic("bcopy_phys CMAP changed"); - *(pt_entry_t *) CM1 = 0; - *(pt_entry_t *) CM2 = 0; - mp_enable_preemption(); + mapwindow_t *src_map, *dst_map; + pt_entry_t save1, save2; + + /* ensure we stay within a page */ + if ( ((((uint32_t)src64 & (NBPG-1)) + bytes) > NBPG) || ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) { + panic("bcopy_phys alignment"); + } + mp_disable_preemption(); + + src_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF)); + dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD)); + + if (src_map == 0 || dst_map == 0) { + panic("bcopy_phys: CMAP busy"); + } + save1 = *src_map->prv_CMAP; + save2 = *dst_map->prv_CMAP; + + invlpg((uintptr_t)src_map->prv_CADDR); + invlpg((uintptr_t)dst_map->prv_CADDR); + + bcopy((void *) ((uintptr_t)src_map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)), + (void *) ((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)), bytes); + + if ( (save1 != *src_map->prv_CMAP) || (save2 != *dst_map->prv_CMAP)) + panic("bcopy_phys CMAP changed"); + *src_map->prv_CMAP = 0; + *dst_map->prv_CMAP = 0; + + mp_enable_preemption(); } /* @@ -230,217 +202,242 @@ ovbcopy( static unsigned int -ml_phys_read_data( vm_offset_t paddr, int size ) +ml_phys_read_data(pmap_paddr_t paddr, int size ) { - unsigned int result; - pt_entry_t save; - mp_disable_preemption(); - if (*(pt_entry_t *) CM3) - panic("ml_phys_read_data: CMAP busy"); + mapwindow_t *map; + unsigned int result; + pt_entry_t save; - *(pt_entry_t *) CM3 = INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF; - save = *(pt_entry_t *)CM3; - invlpg((u_int)CA3); + mp_disable_preemption(); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF)); + if (map == 0) { + panic("ml_phys_read_data: CMAP busy"); + } + save = *map->prv_CMAP; + invlpg((uintptr_t)map->prv_CADDR); switch (size) { unsigned char s1; unsigned short s2; case 1: - s1 = *(unsigned char *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + s1 = *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); result = s1; break; case 2: - s2 = *(unsigned short *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + s2 = *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); result = s2; break; case 4: default: - result = *(unsigned int *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + result = *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); break; } - if (save != *(pt_entry_t *)CM3) panic("ml_phys_read_data CMAP changed"); - *(pt_entry_t *) CM3 = 0; + if (save != *map->prv_CMAP) + panic("ml_phys_read_data CMAP changed"); + *map->prv_CMAP = 0; mp_enable_preemption(); + return result; } static unsigned long long -ml_phys_read_long_long( vm_offset_t paddr ) +ml_phys_read_long_long(pmap_paddr_t paddr ) { - unsigned long long result; - pt_entry_t save; - mp_disable_preemption(); - if (*(pt_entry_t *) CM3) - panic("ml_phys_read_data: CMAP busy"); + mapwindow_t *map; + unsigned long long result; + pt_entry_t save; - *(pt_entry_t *) CM3 = INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF; - save = *(pt_entry_t *)CM3; - invlpg((u_int)CA3); + mp_disable_preemption(); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF)); - result = *(unsigned long long *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + if (map == 0) { + panic("ml_phys_read_long_long: CMAP busy"); + } + + save = *map->prv_CMAP; + invlpg((uintptr_t)map->prv_CADDR); - if (save != *(pt_entry_t *)CM3) panic("ml_phys_read_data CMAP changed"); - *(pt_entry_t *) CM3 = 0; + result = *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); + + if (save != *map->prv_CMAP) + panic("ml_phys_read_long_long CMAP changed"); + *map->prv_CMAP = 0; mp_enable_preemption(); - return result; + + return result; } -unsigned int ml_phys_read( vm_offset_t paddr) + + +unsigned int ml_phys_read(vm_offset_t paddr) { - return ml_phys_read_data(paddr, 4); + return ml_phys_read_data((pmap_paddr_t)paddr, 4); } unsigned int ml_phys_read_word(vm_offset_t paddr) { - return ml_phys_read_data(paddr, 4); + + return ml_phys_read_data((pmap_paddr_t)paddr, 4); } unsigned int ml_phys_read_64(addr64_t paddr64) { - return ml_phys_read_data(low32(paddr64), 4); + return ml_phys_read_data((pmap_paddr_t)paddr64, 4); } unsigned int ml_phys_read_word_64(addr64_t paddr64) { - return ml_phys_read_data(low32(paddr64), 4); + return ml_phys_read_data((pmap_paddr_t)paddr64, 4); } unsigned int ml_phys_read_half(vm_offset_t paddr) { - return ml_phys_read_data(paddr, 2); + return ml_phys_read_data((pmap_paddr_t)paddr, 2); } unsigned int ml_phys_read_half_64(addr64_t paddr64) { - return ml_phys_read_data(low32(paddr64), 2); + return ml_phys_read_data((pmap_paddr_t)paddr64, 2); } unsigned int ml_phys_read_byte(vm_offset_t paddr) { - return ml_phys_read_data(paddr, 1); + return ml_phys_read_data((pmap_paddr_t)paddr, 1); } unsigned int ml_phys_read_byte_64(addr64_t paddr64) { - return ml_phys_read_data(low32(paddr64), 1); + return ml_phys_read_data((pmap_paddr_t)paddr64, 1); } unsigned long long ml_phys_read_double(vm_offset_t paddr) { - return ml_phys_read_long_long(paddr); + return ml_phys_read_long_long((pmap_paddr_t)paddr); } -unsigned long long ml_phys_read_double_64(addr64_t paddr) +unsigned long long ml_phys_read_double_64(addr64_t paddr64) { - return ml_phys_read_long_long(low32(paddr)); + return ml_phys_read_long_long((pmap_paddr_t)paddr64); } + /* * Write data to a physical address. Memory should not be cache inhibited. */ static void -ml_phys_write_data( vm_offset_t paddr, unsigned long data, int size ) +ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size) { - pt_entry_t save; - mp_disable_preemption(); - if (*(pt_entry_t *) CM3) + mapwindow_t *map; + pt_entry_t save; + + mp_disable_preemption(); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD)); + + if (map == 0) { panic("ml_phys_write_data: CMAP busy"); + } - *(pt_entry_t *) CM3 = INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - save = *(pt_entry_t *)CM3; - invlpg((u_int)CA3); + save = *map->prv_CMAP; + invlpg((uintptr_t)map->prv_CADDR); switch (size) { case 1: - *(unsigned char *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = (unsigned char)data; + *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned char)data; break; case 2: - *(unsigned short *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = (unsigned short)data; + *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned short)data; break; case 4: default: - *(unsigned int *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = data; + *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data; break; } - if (save != *(pt_entry_t *)CM3) panic("ml_phys_write_data CMAP changed"); - *(pt_entry_t *) CM3 = 0; + if (save != *map->prv_CMAP) + panic("ml_phys_write_data CMAP changed"); + *map->prv_CMAP = 0; + mp_enable_preemption(); } static void -ml_phys_write_long_long( vm_offset_t paddr, unsigned long long data ) +ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data) { - pt_entry_t save; - mp_disable_preemption(); - if (*(pt_entry_t *) CM3) + mapwindow_t *map; + pt_entry_t save; + + mp_disable_preemption(); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD)); + if (map == 0) { panic("ml_phys_write_data: CMAP busy"); + } - *(pt_entry_t *) CM3 = INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - save = *(pt_entry_t *)CM3; - invlpg((u_int)CA3); + save = *map->prv_CMAP; + invlpg((uintptr_t)map->prv_CADDR); - *(unsigned long long *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = data; + *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data; - if (save != *(pt_entry_t *)CM3) panic("ml_phys_write_data CMAP changed"); - *(pt_entry_t *) CM3 = 0; + if (save != *map->prv_CMAP) + panic("ml_phys_write_data CMAP changed"); + *map->prv_CMAP = 0; mp_enable_preemption(); } + + void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) { - ml_phys_write_data(paddr, data, 1); + ml_phys_write_data((pmap_paddr_t)paddr, data, 1); } -void ml_phys_write_byte_64(addr64_t paddr, unsigned int data) +void ml_phys_write_byte_64(addr64_t paddr64, unsigned int data) { - ml_phys_write_data(low32(paddr), data, 1); + ml_phys_write_data((pmap_paddr_t)paddr64, data, 1); } void ml_phys_write_half(vm_offset_t paddr, unsigned int data) { - ml_phys_write_data(paddr, data, 2); + ml_phys_write_data((pmap_paddr_t)paddr, data, 2); } -void ml_phys_write_half_64(addr64_t paddr, unsigned int data) +void ml_phys_write_half_64(addr64_t paddr64, unsigned int data) { - ml_phys_write_data(low32(paddr), data, 2); + ml_phys_write_data((pmap_paddr_t)paddr64, data, 2); } void ml_phys_write(vm_offset_t paddr, unsigned int data) { - ml_phys_write_data(paddr, data, 4); + ml_phys_write_data((pmap_paddr_t)paddr, data, 4); } -void ml_phys_write_64(addr64_t paddr, unsigned int data) +void ml_phys_write_64(addr64_t paddr64, unsigned int data) { - ml_phys_write_data(low32(paddr), data, 4); + ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); } void ml_phys_write_word(vm_offset_t paddr, unsigned int data) { - ml_phys_write_data(paddr, data, 4); + ml_phys_write_data((pmap_paddr_t)paddr, data, 4); } -void ml_phys_write_word_64(addr64_t paddr, unsigned int data) +void ml_phys_write_word_64(addr64_t paddr64, unsigned int data) { - ml_phys_write_data(low32(paddr), data, 4); + ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); } - void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) { - ml_phys_write_long_long(paddr, data); + ml_phys_write_long_long((pmap_paddr_t)paddr, data); } -void ml_phys_write_double_64(addr64_t paddr, unsigned long long data) +void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data) { - ml_phys_write_long_long(low32(paddr), data); + ml_phys_write_long_long((pmap_paddr_t)paddr64, data); } @@ -456,8 +453,9 @@ void ml_phys_write_double_64(addr64_t paddr, unsigned long long data) boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val) { - *val = ml_phys_read(paddr); - return TRUE; + *val = ml_phys_read((pmap_paddr_t)paddr); + + return TRUE; } /* @@ -467,10 +465,11 @@ ml_probe_read(vm_offset_t paddr, unsigned int *val) * We assume that machine checks are enabled both in MSR and HIDs */ boolean_t -ml_probe_read_64(addr64_t paddr, unsigned int *val) +ml_probe_read_64(addr64_t paddr64, unsigned int *val) { - *val = ml_phys_read_64(paddr); - return TRUE; + *val = ml_phys_read_64((pmap_paddr_t)paddr64); + + return TRUE; } @@ -526,8 +525,6 @@ strlen( return string - 1 - ret; } -#include - uint32_t hw_atomic_add( uint32_t *dest, @@ -624,23 +621,26 @@ void machine_callstack( void fillPage(ppnum_t pa, unsigned int fill) { - pmap_paddr_t src; - int i; - int cnt = PAGE_SIZE/sizeof(unsigned int); - unsigned int *addr; - mp_disable_preemption(); - if (*(pt_entry_t *) CM2) + mapwindow_t *map; + pmap_paddr_t src; + int i; + int cnt = PAGE_SIZE/sizeof(unsigned int); + unsigned int *addr; + + mp_disable_preemption(); + src = i386_ptob(pa); + map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD)); + if (map == 0) { panic("fillPage: CMAP busy"); - src = (pmap_paddr_t)i386_ptob(pa); - *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - invlpg((u_int)CA2); + } + invlpg((uintptr_t)map->prv_CADDR); - for (i = 0, addr = (unsigned int *)CA2; i < cnt ; i++ ) - *addr++ = fill; + for (i = 0, addr = (unsigned int *)map->prv_CADDR; i < cnt ; i++ ) + *addr++ = fill; - *(pt_entry_t *) CM2 = 0; - mp_enable_preemption(); + *map->prv_CMAP = 0; + mp_enable_preemption(); } static inline void __sfence(void) @@ -657,26 +657,30 @@ static inline void __wbinvd(void) } static inline void __clflush(void *ptr) { - __asm__ volatile(".byte 0x0F; .byte 0xae; .byte 0x38" : : "a" (ptr)); + __asm__ volatile("clflush (%0)" : : "r" (ptr)); } void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) { - if (cpuid_features() & CPUID_FEATURE_CLFSH) - { + mapwindow_t *map; uint32_t linesize = cpuid_info()->cache_linesize; addr64_t addr; uint32_t offset, chunk; boolean_t istate; + __mfence(); + istate = ml_set_interrupts_enabled(FALSE); - if (*(pt_entry_t *) CM2) + offset = pa & (linesize - 1); + addr = pa - offset; + + map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID)); + if (map == 0) { panic("cache_flush_page_phys: CMAP busy"); + } - offset = pa & (linesize - 1); count += offset; - addr = pa - offset; offset = addr & ((addr64_t) (page_size - 1)); chunk = page_size - offset; @@ -685,11 +689,11 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) if (chunk > count) chunk = count; - *(pt_entry_t *) CM2 = i386_ptob(atop_64(addr)) | INTEL_PTE_VALID; - invlpg((u_int)CA2); + *map->prv_CMAP = (pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID); + invlpg((uintptr_t)map->prv_CADDR); for (; offset < chunk; offset += linesize) - __clflush((void *)(((u_int)CA2) + offset)); + __clflush((void *)(((uintptr_t)map->prv_CADDR) + offset)); count -= chunk; addr += chunk; @@ -698,13 +702,11 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) } while (count); - *(pt_entry_t *) CM2 = 0; + *map->prv_CMAP = 0; (void) ml_set_interrupts_enabled(istate); - } - else - __wbinvd(); - __sfence(); + + __mfence(); } void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count) @@ -726,174 +728,586 @@ invalidate_icache64(__unused addr64_t addr, { } -kern_return_t copypv(addr64_t src64, - addr64_t snk64, - unsigned int size, - int which) + +addr64_t vm_last_addr; + +void +mapping_set_mod(ppnum_t pn) { - - vm_map_t map; - kern_return_t ret; - vm_offset_t source, sink; - vm_offset_t vaddr; - vm_offset_t paddr; - spl_t s; - unsigned int lop, csize; - int needtran, bothphys; - vm_prot_t prot; - pt_entry_t *ptep; - - map = (which & cppvKmap) ? kernel_map : current_map_fast(); + pmap_set_modify(pn); +} - source = low32(src64); - sink = low32(snk64); +void +mapping_set_ref(ppnum_t pn) +{ + pmap_set_reference(pn); +} - if((which & (cppvPsrc | cppvPsnk)) == 0 ) { /* Make sure that only one is virtual */ - panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */ +void +cache_flush_page_phys(ppnum_t pa) +{ + mapwindow_t *map; + boolean_t istate; + int i; + unsigned char *cacheline_addr; + int cacheline_size = cpuid_info()->cache_linesize; + int cachelines_in_page = PAGE_SIZE/cacheline_size; + + __mfence(); + + istate = ml_set_interrupts_enabled(FALSE); + + map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(pa) | INTEL_PTE_VALID)); + if (map == 0) { + panic("cache_flush_page_phys: CMAP busy"); + } + + invlpg((uintptr_t)map->prv_CADDR); + + for (i = 0, cacheline_addr = (unsigned char *)map->prv_CADDR; + i < cachelines_in_page; + i++, cacheline_addr += cacheline_size) { + __clflush((void *) cacheline_addr); } + + *map->prv_CMAP = 0; + + (void) ml_set_interrupts_enabled(istate); + + __mfence(); +} + + +void exit_funnel_section(void) +{ + thread_t thread; + + thread = current_thread(); + + if (thread->funnel_lock) + (void) thread_funnel_set(thread->funnel_lock, FALSE); +} + + + +/* + * the copy engine has the following characteristics + * - copyio handles copies to/from user or kernel space + * - copypv deals with physical or virtual addresses + * + * implementation details as follows + * - a cache of up to NCOPY_WINDOWS is maintained per thread for + * access of user virutal space + * - the window size is determined by the amount of virtual space + * that can be mapped by a single page table + * - the mapping is done by copying the page table pointer from + * the user's directory entry corresponding to the window's + * address in user space to the directory entry corresponding + * to the window slot in the kernel's address space + * - the set of mappings is preserved across context switches, + * so the copy can run with pre-emption enabled + * - there is a gdt entry set up to anchor the kernel window on + * each processor + * - the copies are done using the selector corresponding to the + * gdt entry + * - the addresses corresponding to the user virtual address are + * relative to the beginning of the window being used to map + * that region... thus the thread can be pre-empted and switched + * to a different processor while in the midst of a copy + * - the window caches must be invalidated if the pmap changes out + * from under the thread... this can happen during vfork/exec... + * inval_copy_windows is the invalidation routine to be used + * - the copyio engine has 4 different states associated with it + * that allows for lazy tlb flushes and the ability to avoid + * a flush all together if we've just come from user space + * the 4 states are as follows... + * + * WINDOWS_OPENED - set by copyio to indicate to the context + * switch code that it is necessary to do a tlbflush after + * switching the windows since we're in the middle of a copy + * + * WINDOWS_CLOSED - set by copyio to indicate that it's done + * using the windows, so that the context switch code need + * not do the tlbflush... instead it will set the state to... + * + * WINDOWS_DIRTY - set by the context switch code to indicate + * to the copy engine that it is responsible for doing a + * tlbflush before using the windows again... it's also + * set by the inval_copy_windows routine to indicate the + * same responsibility. + * + * WINDOWS_CLEAN - set by the return to user path to indicate + * that a tlbflush has happened and that there is no need + * for copyio to do another when it is entered next... + * + * - a window for mapping single physical pages is provided for copypv + * - this window is maintained across context switches and has the + * same characteristics as the user space windows w/r to pre-emption + */ + +extern int copyout_user(const char *, vm_offset_t, vm_size_t); +extern int copyout_kern(const char *, vm_offset_t, vm_size_t); +extern int copyin_user(const vm_offset_t, char *, vm_size_t); +extern int copyin_kern(const vm_offset_t, char *, vm_size_t); +extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t); +extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t); +extern int copyinphys_user(const vm_offset_t, char *, vm_size_t); +extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t); +extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *); +extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *); + +static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int); +static int copyio_phys(addr64_t, addr64_t, vm_size_t, int); + + +#define COPYIN 0 +#define COPYOUT 1 +#define COPYINSTR 2 +#define COPYINPHYS 3 +#define COPYOUTPHYS 4 + + + +void inval_copy_windows(thread_t thread) +{ + int i; - bothphys = 1; /* Assume both are physical */ - - if(!(which & cppvPsnk)) { /* Is there a virtual page here? */ - vaddr = sink; /* Sink side is virtual */ - bothphys = 0; /* Show both aren't physical */ - prot = VM_PROT_READ | VM_PROT_WRITE; /* Sink always must be read/write */ - } else /* if(!(which & cppvPsrc)) */ { /* Source side is virtual */ - vaddr = source; /* Source side is virtual */ - bothphys = 0; /* Show both aren't physical */ - prot = VM_PROT_READ; /* Virtual source is always read only */ + for (i = 0; i < NCOPY_WINDOWS; i++) { + thread->machine.copy_window[i].user_base = -1; } + thread->machine.nxt_window = 0; + thread->machine.copyio_state = WINDOWS_DIRTY; - needtran = 1; /* Show we need to map the virtual the first time */ - s = splhigh(); /* Don't bother me */ - - while(size) { - - if(!bothphys && (needtran || !(vaddr & 4095LL))) { /* If first time or we stepped onto a new page, we need to translate */ - needtran = 0; - while(1) { - ptep = pmap_mapgetpte(map, vaddr); - if((0 == ptep) || ((*ptep & INTEL_PTE_VALID) == 0)) { - splx(s); /* Restore the interrupt level */ - ret = vm_fault(map, vm_map_trunc_page(vaddr), prot, FALSE, THREAD_UNINT, NULL, 0); /* Didn't find it, try to fault it in... */ - - if(ret != KERN_SUCCESS)return KERN_FAILURE; /* Didn't find any, return no good... */ - - s = splhigh(); /* Don't bother me */ - continue; /* Go try for the map again... */ - - } - - /* Note that we have to have the destination writable. So, if we already have it, or we are mapping the source, - we can just leave. - */ - if((which & cppvPsnk) || (*ptep & INTEL_PTE_WRITE)) break; /* We got it mapped R/W or the source is not virtual, leave... */ - splx(s); /* Restore the interrupt level */ - - ret = vm_fault(map, vm_map_trunc_page(vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); /* check for a COW area */ - if (ret != KERN_SUCCESS) return KERN_FAILURE; /* We couldn't get it R/W, leave in disgrace... */ - s = splhigh(); /* Don't bother me */ - } + KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (int)thread, (int)thread->map, 0, 0, 0); +} - paddr = pte_to_pa(*ptep) | (vaddr & 4095); - - if(which & cppvPsrc) sink = paddr; /* If source is physical, then the sink is virtual */ - else source = paddr; /* Otherwise the source is */ + +static int +copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map) +{ + thread_t thread; + pmap_t pmap; + pt_entry_t *updp; + pt_entry_t *kpdp; + user_addr_t user_base; + vm_offset_t user_offset; + vm_offset_t kern_vaddr; + vm_size_t cnt; + vm_size_t bytes_copied; + int error = 0; + int window_index; + int copyio_state; + boolean_t istate; +#if KDEBUG + int debug_type = 0xeff70010; + debug_type += (copy_type << 2); +#endif + + thread = current_thread(); + + KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr, (int)nbytes, thread->machine.copyio_state, 0); + + if (nbytes == 0) { + KERNEL_DEBUG(debug_type | DBG_FUNC_END, (int)user_addr, (int)kernel_addr, (int)nbytes, 0, 0); + return (0); + } + pmap = thread->map->pmap; + + if (pmap == kernel_pmap || use_kernel_map) { + + kern_vaddr = (vm_offset_t)user_addr; + + switch (copy_type) { + + case COPYIN: + error = copyin_kern(kern_vaddr, kernel_addr, nbytes); + break; + + case COPYOUT: + error = copyout_kern(kernel_addr, kern_vaddr, nbytes); + break; + + case COPYINSTR: + error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied); + break; + + case COPYINPHYS: + error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes); + break; + + case COPYOUTPHYS: + error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes); + break; } - - lop = (unsigned int)(4096LL - (sink & 4095LL)); /* Assume sink smallest */ - if(lop > (unsigned int)(4096LL - (source & 4095LL))) lop = (unsigned int)(4096LL - (source & 4095LL)); /* No, source is smaller */ - - csize = size; /* Assume we can copy it all */ - if(lop < size) csize = lop; /* Nope, we can't do it all */ - - if(which & cppvFsrc) flush_dcache64((addr64_t)source, csize, 1); /* If requested, flush source before move */ - if(which & cppvFsnk) flush_dcache64((addr64_t)sink, csize, 1); /* If requested, flush sink before move */ + KERNEL_DEBUG(debug_type | DBG_FUNC_END, (int)kern_vaddr, (int)kernel_addr, (int)nbytes, error | 0x80000000, 0); - bcopy_phys((addr64_t)source, (addr64_t)sink, csize); /* Do a physical copy, virtually */ - - if(which & cppvFsrc) flush_dcache64((addr64_t)source, csize, 1); /* If requested, flush source after move */ - if(which & cppvFsnk) flush_dcache64((addr64_t)sink, csize, 1); /* If requested, flush sink after move */ + return (error); + } + user_base = user_addr & ~((user_addr_t)(NBPDE - 1)); + user_offset = user_addr & (NBPDE - 1); + KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base, (int)user_offset, 0, 0); -/* - * Note that for certain ram disk flavors, we may be copying outside of known memory. - * Therefore, before we try to mark it modifed, we check if it exists. - */ + cnt = NBPDE - user_offset; + + if (cnt > nbytes) + cnt = nbytes; + + istate = ml_set_interrupts_enabled(FALSE); + + copyio_state = thread->machine.copyio_state; + thread->machine.copyio_state = WINDOWS_OPENED; + + (void) ml_set_interrupts_enabled(istate); + + + for (;;) { - if( !(which & cppvNoModSnk)) { - if (phys_page_exists((ppnum_t)sink >> 12)) - mapping_set_mod((ppnum_t)(sink >> 12)); /* Make sure we know that it is modified */ + for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) { + if (thread->machine.copy_window[window_index].user_base == user_base) + break; } - if( !(which & cppvNoRefSrc)) { - if (phys_page_exists((ppnum_t)source >> 12)) - mapping_set_ref((ppnum_t)(source >> 12)); /* Make sure we know that it is modified */ + if (window_index >= NCOPY_WINDOWS) { + + window_index = thread->machine.nxt_window; + thread->machine.nxt_window++; + + if (thread->machine.nxt_window >= NCOPY_WINDOWS) + thread->machine.nxt_window = 0; + thread->machine.copy_window[window_index].user_base = user_base; + + /* + * it's necessary to disable pre-emption + * since I have to compute the kernel descriptor pointer + * for the new window + */ + istate = ml_set_interrupts_enabled(FALSE); + + updp = pmap_pde(pmap, user_base); + + kpdp = current_cpu_datap()->cpu_copywindow_pdp; + kpdp += window_index; + + pmap_store_pte(kpdp, updp ? *updp : 0); + + (void) ml_set_interrupts_enabled(istate); + + copyio_state = WINDOWS_DIRTY; + + KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index, (int)user_base, (int)updp, (int)kpdp, 0); + } +#if JOE_DEBUG + else { + updp = pmap_pde(pmap, user_base); + istate = ml_set_interrupts_enabled(FALSE); - size = size - csize; /* Calculate what is left */ - vaddr = vaddr + csize; /* Move to next sink address */ - source = source + csize; /* Bump source to next physical address */ - sink = sink + csize; /* Bump sink to next physical address */ + kpdp = current_cpu_datap()->cpu_copywindow_pdp; + + kpdp += window_index; + + if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) { + panic("copyio: user pdp mismatch - kpdp = 0x%x, updp = 0x%x\n", kpdp, updp); + } + (void) ml_set_interrupts_enabled(istate); + } +#endif + if (copyio_state == WINDOWS_DIRTY) { + flush_tlb(); + + copyio_state = WINDOWS_CLEAN; + + KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0); + } + user_offset += (window_index * NBPDE); + + KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (int)user_offset, (int)kernel_addr, cnt, 0, 0); + + switch (copy_type) { + + case COPYIN: + error = copyin_user(user_offset, kernel_addr, cnt); + break; + + case COPYOUT: + error = copyout_user(kernel_addr, user_offset, cnt); + break; + + case COPYINPHYS: + error = copyinphys_user(user_offset, kernel_addr, cnt); + break; + + case COPYOUTPHYS: + error = copyoutphys_user(kernel_addr, user_offset, cnt); + break; + + case COPYINSTR: + error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied); + + /* + * lencopied should be updated on success + * or ENAMETOOLONG... but not EFAULT + */ + if (error != EFAULT) + *lencopied += bytes_copied; + + /* + * if we still have room, then the ENAMETOOLONG + * is just an artifact of the buffer straddling + * a window boundary and we should continue + */ + if (error == ENAMETOOLONG && nbytes > cnt) + error = 0; + + if (error) { +#if KDEBUG + nbytes = *lencopied; +#endif + break; + } + if (*(kernel_addr + bytes_copied - 1) == 0) { + /* + * we found a NULL terminator... we're done + */ +#if KDEBUG + nbytes = *lencopied; +#endif + goto done; + } + if (cnt == nbytes) { + /* + * no more room in the buffer and we haven't + * yet come across a NULL terminator + */ +#if KDEBUG + nbytes = *lencopied; +#endif + error = ENAMETOOLONG; + break; + } + assert(cnt == bytes_copied); + + break; + } + if (error) + break; + if ((nbytes -= cnt) == 0) + break; + + kernel_addr += cnt; + user_base += NBPDE; + user_offset = 0; + + if (nbytes > NBPDE) + cnt = NBPDE; + else + cnt = nbytes; } - - splx(s); /* Open up for interrupts */ +done: + thread->machine.copyio_state = WINDOWS_CLOSED; - return KERN_SUCCESS; + KERNEL_DEBUG(debug_type | DBG_FUNC_END, (int)user_addr, (int)kernel_addr, (int)nbytes, error, 0); + + return (error); } -void switch_to_serial_console(void) + +static int +copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which) { + pmap_paddr_t paddr; + user_addr_t vaddr; + char *window_offset; + pt_entry_t pentry; + int ctype; + int retval; + boolean_t istate; + + if (which & cppvPsnk) { + paddr = (pmap_paddr_t)sink; + vaddr = (user_addr_t)source; + ctype = COPYINPHYS; + pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW); + } else { + paddr = (pmap_paddr_t)source; + vaddr = (user_addr_t)sink; + ctype = COPYOUTPHYS; + pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME)); + } + window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1)); + + if (current_thread()->machine.physwindow_busy) { + pt_entry_t old_pentry; + + KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0); + /* + * we had better be targeting wired memory at this point + * we will not be able to handle a fault with interrupts + * disabled... we disable them because we can't tolerate + * being preempted during this nested use of the window + */ + istate = ml_set_interrupts_enabled(FALSE); + + old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep); + pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry); + + invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); + + retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap); + + pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry); + + invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); + + (void) ml_set_interrupts_enabled(istate); + } else { + /* + * mark the window as in use... if an interrupt hits while we're + * busy, or we trigger another coyppv from the fault path into + * the driver on a user address space page fault due to a copyin/out + * then we need to save and restore the current window state instead + * of caching the window preserving it across context switches + */ + current_thread()->machine.physwindow_busy = 1; + + if (current_thread()->machine.physwindow_pte != pentry) { + KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0); + + current_thread()->machine.physwindow_pte = pentry; + + /* + * preemption at this point would be bad since we + * could end up on the other processor after we grabbed the + * pointer to the current cpu data area, but before we finished + * using it to stuff the page table entry since we would + * be modifying a window that no longer belonged to us + * the invlpg can be done unprotected since it only flushes + * this page address from the tlb... if it flushes the wrong + * one, no harm is done, and the context switch that moved us + * to the other processor will have already take care of + * flushing the tlb after it reloaded the page table from machine.physwindow_pte + */ + istate = ml_set_interrupts_enabled(FALSE); + *(current_cpu_datap()->cpu_physwindow_ptep) = pentry; + (void) ml_set_interrupts_enabled(istate); + + invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); + } +#if JOE_DEBUG + else { + if (pentry != + (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW))) + panic("copyio_phys: pentry != *physwindow_ptep"); + } +#endif + retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap); + + current_thread()->machine.physwindow_busy = 0; + } + return (retval); } -addr64_t vm_last_addr; -void -mapping_set_mod(ppnum_t pn) + +int +copyinmsg(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes) { - pmap_set_modify(pn); -} + return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0)); +} -void -mapping_set_ref(ppnum_t pn) +int +copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes) { - pmap_set_reference(pn); + return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0)); } -void -cache_flush_page_phys(ppnum_t pa) +int +copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied) { - boolean_t istate; - int i; - unsigned int *cacheline_addr; - int cacheline_size = cpuid_info()->cache_linesize; - int cachelines_in_page = PAGE_SIZE/cacheline_size; + *lencopied = 0; - /* - * If there's no clflush instruction, we're sadly forced to use wbinvd. - */ - if (!(cpuid_features() & CPUID_FEATURE_CLFSH)) { - asm volatile("wbinvd" : : : "memory"); - return; - } + return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0)); +} - istate = ml_set_interrupts_enabled(FALSE); +int +copyoutmsg(const char *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) +{ + return (copyio(COPYOUT, user_addr, (char *)kernel_addr, nbytes, NULL, 0)); +} - if (*(pt_entry_t *) CM2) - panic("cache_flush_page_phys: CMAP busy"); +int +copyout(const char *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) +{ + return (copyio(COPYOUT, user_addr, (char *)kernel_addr, nbytes, NULL, 0)); +} - *(pt_entry_t *) CM2 = i386_ptob(pa) | INTEL_PTE_VALID; - invlpg((u_int)CA2); - for (i = 0, cacheline_addr = (unsigned int *)CA2; - i < cachelines_in_page; - i++, cacheline_addr += cacheline_size) { - asm volatile("clflush %0" : : "m" (cacheline_addr)); - } +kern_return_t copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which) +{ + unsigned int lop, csize; + int bothphys = 0; + - *(pt_entry_t *) CM2 = 0; + KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (int)src64, (int)snk64, size, which, 0); - (void) ml_set_interrupts_enabled(istate); + if ((which & (cppvPsrc | cppvPsnk)) == 0 ) /* Make sure that only one is virtual */ + panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */ -} + if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk)) + bothphys = 1; /* both are physical */ + + while (size) { + + if (bothphys) { + lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */ + + if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)))) + lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */ + } else { + /* + * only need to compute the resid for the physical page + * address... we don't care about where we start/finish in + * the virtual since we just call the normal copyin/copyout + */ + if (which & cppvPsrc) + lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); + else + lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); + } + csize = size; /* Assume we can copy it all */ + if (lop < size) + csize = lop; /* Nope, we can't do it all */ +#if 0 + /* + * flush_dcache64 is currently a nop on the i386... + * it's used when copying to non-system memory such + * as video capture cards... on PPC there was a need + * to flush due to how we mapped this memory... not + * sure if it's needed on i386. + */ + if (which & cppvFsrc) + flush_dcache64(src64, csize, 1); /* If requested, flush source before move */ + if (which & cppvFsnk) + flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */ +#endif + if (bothphys) + bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */ + else { + if (copyio_phys(src64, snk64, csize, which)) + return (KERN_FAILURE); + } +#if 0 + if (which & cppvFsrc) + flush_dcache64(src64, csize, 1); /* If requested, flush source after move */ + if (which & cppvFsnk) + flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */ +#endif + size -= csize; /* Calculate what is left */ + snk64 += csize; /* Bump sink to next physical address */ + src64 += csize; /* Bump source to next physical address */ + } + KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (int)src64, (int)snk64, size, which, 0); + return KERN_SUCCESS; +} diff --git a/osfmk/i386/lowglobals.h b/osfmk/i386/lowglobals.h new file mode 100644 index 000000000..22bfed701 --- /dev/null +++ b/osfmk/i386/lowglobals.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Header files for the Low Memory Globals (lg) + */ +#ifndef _LOW_MEMORY_GLOBALS_H_ +#define _LOW_MEMORY_GLOBALS_H_ + +#include +#include +#include +#include + +/* + * Don't change these structures unless you change the corresponding assembly code + * which is in lowmem_vectors.s + */ + +/* + * This is where we put constants, pointers, and data areas that must be accessed + * quickly through assembler. They are designed to be accessed directly with + * absolute addresses, not via a base register. This is a global area, and not + * per processor. + */ + +#pragma pack(4) /* Make sure the structure stays as we defined it */ +typedef struct lowglo { + + unsigned char lgVerCode[8]; /* 0x2000 System verification code */ + unsigned long long lgZero; /* 0x2008 Double constant 0 */ + uint32_t lgRsv010[3]; /* 0x2010 Reserved */ + uint32_t lgVersion; /* 0x201C Pointer to kernel version string */ + uint32_t lgRsv020[280]; /* 0X2020 Reserved */ + uint32_t lgKmodptr; /* 0x2480 Pointer to kmod, debugging aid */ + uint32_t lgTransOff; /* 0x2484 Pointer to kdp_trans_off, debugging aid */ + uint32_t lgRsv028; /* 0x2488 Reserved */ + uint32_t lgDevSlot1; /* 0x248C For developer use */ + uint32_t lgDevSlot2; /* 0x2490 For developer use */ + uint32_t lgRsv494[731]; /* Reserved - push to 1 page */ +} lowglo; + + +#endif /* _LOW_MEMORY_GLOBALS_H_ */ diff --git a/osfmk/i386/lowmem_vectors.s b/osfmk/i386/lowmem_vectors.s new file mode 100644 index 000000000..bf4cd2941 --- /dev/null +++ b/osfmk/i386/lowmem_vectors.s @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include + +#include +#include +#include +#include + + +/* on x86 the low mem vectors live here and get mapped to 0x2000 at + * system startup time + */ + + .text + .align 12 + + .globl EXT(lowGlo) +EXT(lowGlo): + + .ascii "Catfish " /* 0x2000 System verification code */ + .long 0 /* 0x2008 Double constant 0 */ + .long 0 + .long 0 /* 0x2010 Reserved */ + .long 0 /* 0x2014 Reserved */ + .long 0 /* 0x2018 Reserved */ + .long EXT(version) /* 0x201C Pointer to kernel version string */ + .fill 280, 4, 0 /* 0x2020 Reserved */ + .long EXT(kmod) /* 0x2480 Pointer to kmod, debugging aid */ + .long EXT(kdp_trans_off) /* 0x2484 Pointer to kdp_trans_off, debugging aid */ + .long 0 /* 0x2488 Reserved */ + .long 0 /* 0x248C Reserved for developer use */ + .long 0 /* 0x2490 Reserved for developer use */ + .fill 731, 4, 0 diff --git a/osfmk/i386/machdep_call.c b/osfmk/i386/machdep_call.c index 4e44c0559..1cebbe4e0 100644 --- a/osfmk/i386/machdep_call.c +++ b/osfmk/i386/machdep_call.c @@ -35,11 +35,6 @@ #include extern kern_return_t kern_invalid(void); -#ifdef FIXME -extern kern_return_t PCcreate(), PCldt(), PCresume(); -extern kern_return_t PCcopyBIOSData(), PCmapBIOSRom(); -extern kern_return_t PCsizeBIOSExtData(), PCcopyBIOSExtData(); -#endif machdep_call_t machdep_call_table[] = { MACHDEP_CALL_ROUTINE(thread_get_cthread_self,0), @@ -47,16 +42,19 @@ machdep_call_t machdep_call_table[] = { MACHDEP_CALL_ROUTINE(kern_invalid,0), MACHDEP_CALL_ROUTINE(thread_fast_set_cthread_self,1), MACHDEP_CALL_ROUTINE(thread_set_user_ldt,3), -#ifdef FIXME - MACHDEP_CALL_ROUTINE(PCcreate,3), - MACHDEP_CALL_ROUTINE(PCldt,3), - MACHDEP_CALL_ROUTINE(PCresume,0), - MACHDEP_CALL_ROUTINE(PCcopyBIOSData,1), - MACHDEP_CALL_ROUTINE(PCsizeBIOSExtData,0), - MACHDEP_CALL_ROUTINE(PCcopyBIOSExtData,1), - MACHDEP_CALL_ROUTINE(PCmapBIOSRom,3), -#endif + MACHDEP_BSD_CALL_ROUTINE(i386_set_ldt,3), + MACHDEP_BSD_CALL_ROUTINE(i386_get_ldt,3), +}; +machdep_call_t machdep_call_table64[] = { + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE64(thread_fast_set_cthread_self64,1), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(kern_invalid,0), }; int machdep_call_count = (sizeof (machdep_call_table) / sizeof (machdep_call_t)); + diff --git a/osfmk/i386/machdep_call.h b/osfmk/i386/machdep_call.h index 6476900ea..cab1fb7e2 100644 --- a/osfmk/i386/machdep_call.h +++ b/osfmk/i386/machdep_call.h @@ -33,27 +33,42 @@ typedef union { kern_return_t (*args_0)(void); kern_return_t (*args_1)(uint32_t); + kern_return_t (*args64_1)(uint64_t); kern_return_t (*args_2)(uint32_t,uint32_t); kern_return_t (*args_3)(uint32_t,uint32_t,uint32_t); - kern_return_t (*args_4)(uint32_t, uint32_t,uint32_t,uint32_t); + kern_return_t (*args_4)(uint32_t,uint32_t,uint32_t,uint32_t); kern_return_t (*args_var)(uint32_t,...); + int (*args_bsd_3)(int *, uint32_t,uint32_t,uint32_t); } machdep_call_routine_t; -#define MACHDEP_CALL_ROUTINE(func,args) \ - { { .args_ ## args = func }, args } +#define MACHDEP_CALL_ROUTINE(func, args) \ + { { .args_ ## args = func }, args, 0 } + +#define MACHDEP_CALL_ROUTINE64(func, args) \ + { { .args64_ ## args = func }, args, 0 } + +#define MACHDEP_BSD_CALL_ROUTINE(func, args) \ + { { .args_bsd_ ## args = func }, args, 1 } typedef struct { machdep_call_routine_t routine; int nargs; + int bsd_style; } machdep_call_t; extern machdep_call_t machdep_call_table[]; +extern machdep_call_t machdep_call_table64[]; + extern int machdep_call_count; extern kern_return_t thread_get_cthread_self(void); extern kern_return_t thread_set_cthread_self(uint32_t); extern kern_return_t thread_fast_set_cthread_self(uint32_t); +extern kern_return_t thread_fast_set_cthread_self64(uint64_t); extern kern_return_t thread_set_user_ldt(uint32_t,uint32_t,uint32_t); -extern void mach25_syscall(struct i386_saved_state *); -extern void machdep_syscall(struct i386_saved_state *); +extern int i386_set_ldt(int *,uint32_t,uint32_t,uint32_t); +extern int i386_get_ldt(int *,uint32_t,uint32_t,uint32_t); + +extern void machdep_syscall(x86_saved_state_t *); +extern void machdep_syscall64(x86_saved_state_t *); diff --git a/osfmk/i386/machine_cpu.h b/osfmk/i386/machine_cpu.h index 85950f68d..21ee6880b 100644 --- a/osfmk/i386/machine_cpu.h +++ b/osfmk/i386/machine_cpu.h @@ -32,9 +32,12 @@ __BEGIN_DECLS void cpu_machine_init( void); -struct i386_interrupt_state; void cpu_signal_handler( - struct i386_interrupt_state *regs); + x86_saved_state_t *regs); + +void handle_pending_TLB_flushes( + void); + kern_return_t cpu_register( int *slot_nump); @@ -42,7 +45,7 @@ __END_DECLS static inline void cpu_halt(void) { - asm volatile( "cli; hlt" ); + asm volatile( "wbinvd; cli; hlt" ); } static inline void cpu_pause(void) diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index fa5002a29..99bb21870 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,15 +35,38 @@ #include #include #include +#include +#include #include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif #define MIN(a,b) ((a)<(b)? (a) : (b)) +#if DEBUG +#define DBG(x...) kprintf("DBG: " x) +#else +#define DBG(x...) +#endif + extern void initialize_screen(Boot_Video *, unsigned int); +extern thread_t Shutdown_context(thread_t thread, void (*doshutdown)(processor_t),processor_t processor); extern void wakeup(void *); +extern unsigned KernelRelocOffset; static int max_cpus_initialized = 0; +unsigned int LockTimeOut = 12500000; +unsigned int MutexSpin = 0; + #define MAX_CPUS_SET 0x1 #define MAX_CPUS_WAIT 0x2 @@ -54,7 +77,7 @@ vm_offset_t ml_io_map( vm_offset_t phys_addr, vm_size_t size) { - return(io_map(phys_addr,size)); + return(io_map(phys_addr,size,VM_WIMG_IO)); } /* boot memory allocation */ @@ -64,6 +87,21 @@ vm_offset_t ml_static_malloc( return((vm_offset_t)NULL); } + +void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size) +{ + *phys_addr = bounce_pool_base; + *size = bounce_pool_size; +} + + +vm_offset_t +ml_boot_ptovirt( + vm_offset_t paddr) +{ + return (vm_offset_t)((paddr-KernelRelocOffset) | LINEAR_KERNEL_ADDRESS); +} + vm_offset_t ml_static_ptovirt( vm_offset_t paddr) @@ -84,7 +122,7 @@ ml_static_mfree( vm_offset_t vaddr_cur; ppnum_t ppn; - if (vaddr < VM_MIN_KERNEL_ADDRESS) return; +// if (vaddr < VM_MIN_KERNEL_ADDRESS) return; assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */ @@ -100,6 +138,7 @@ ml_static_mfree( } } + /* virtual to physical on wired pages */ vm_offset_t ml_vtophys( vm_offset_t vaddr) @@ -131,10 +170,21 @@ boolean_t ml_set_interrupts_enabled(boolean_t enable) __asm__ volatile("pushf; popl %0" : "=r" (flags)); - if (enable) + if (enable) { + ast_t *myast; + + myast = ast_pending(); + + if ( (get_preemption_level() == 0) && (*myast & AST_URGENT) ) { __asm__ volatile("sti"); - else + __asm__ volatile ("int $0xff"); + } else { + __asm__ volatile ("sti"); + } + } + else { __asm__ volatile("cli"); + } return (flags & EFL_IF) != 0; } @@ -191,12 +241,6 @@ void ml_install_interrupt_handler( initialize_screen(0, kPEAcquireScreen); } -static void -cpu_idle(void) -{ - __asm__ volatile("sti; hlt": : :"memory"); -} -void (*cpu_idle_handler)(void) = cpu_idle; void machine_idle(void) @@ -209,12 +253,15 @@ machine_idle(void) * unless kernel param idlehalt is false and no other thread * in the same core is active - if so, don't halt so that this * core doesn't go into a low-power mode. + * For 4/4, we set a null "active cr3" while idle. */ others_active = !atomic_decl_and_test( (long *) &my_core->active_threads, 1); if (idlehalt || others_active) { DBGLOG(cpu_handle, cpu_number(), MP_IDLE); - cpu_idle_handler(); + MARK_CPU_IDLE(cpu_number()); + machine_idle_cstate(); + MARK_CPU_ACTIVE(cpu_number()); DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); } else { __asm__ volatile("sti"); @@ -229,6 +276,16 @@ machine_signal_idle( cpu_interrupt(PROCESSOR_DATA(processor, slot_num)); } +thread_t +machine_processor_shutdown( + thread_t thread, + void (*doshutdown)(processor_t), + processor_t processor) +{ + fpu_save_context(thread); + return(Shutdown_context(thread, doshutdown, processor)); +} + kern_return_t ml_processor_register( cpu_id_t cpu_id, @@ -257,7 +314,13 @@ ml_processor_register( if (this_cpu_datap->cpu_console_buf == NULL) goto failed; + this_cpu_datap->cpu_chud = chudxnu_cpu_alloc(boot_cpu); + if (this_cpu_datap->cpu_chud == NULL) + goto failed; + if (!boot_cpu) { + this_cpu_datap->cpu_core = cpu_thread_alloc(target_cpu); + this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu); if (this_cpu_datap->cpu_pmap == NULL) goto failed; @@ -276,6 +339,7 @@ ml_processor_register( failed: cpu_processor_free(this_cpu_datap->cpu_processor); pmap_cpu_free(this_cpu_datap->cpu_pmap); + chudxnu_cpu_free(this_cpu_datap->cpu_chud); console_cpu_free(this_cpu_datap->cpu_console_buf); return KERN_FAILURE; } @@ -290,11 +354,15 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop) return; /* - * Are we supporting XMM/SSE/SSE2? + * Are we supporting MMX/SSE/SSE2/SSE3? * As distinct from whether the cpu has these capabilities. */ os_supports_sse = get_cr4() & CR4_XMM; - if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) + if ((cpuid_features() & CPUID_FEATURE_MNI) && os_supports_sse) + cpu_infop->vector_unit = 6; + else if ((cpuid_features() & CPUID_FEATURE_SSE3) && os_supports_sse) + cpu_infop->vector_unit = 5; + else if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) cpu_infop->vector_unit = 4; else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) cpu_infop->vector_unit = 3; @@ -319,8 +387,8 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop) } if (cpuid_infop->cache_size[L3U] > 0) { - cpu_infop->l2_settings = 1; - cpu_infop->l2_cache_size = cpuid_infop->cache_size[L3U]; + cpu_infop->l3_settings = 1; + cpu_infop->l3_cache_size = cpuid_infop->cache_size[L3U]; } else { cpu_infop->l3_settings = 0; cpu_infop->l3_cache_size = 0xFFFFFFFF; @@ -365,6 +433,34 @@ ml_get_max_cpus(void) return(machine_info.max_cpus); } +/* + * Routine: ml_init_lock_timeout + * Function: + */ +void +ml_init_lock_timeout(void) +{ + uint64_t abstime; + uint32_t mtxspin; + + /* + * XXX As currently implemented for x86, LockTimeOut should be a + * cycle (tsc) count not an absolute time (nanoseconds) - + * but it's of the right order. + */ + nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime); + LockTimeOut = (unsigned int)abstime; + + if (PE_parse_boot_arg("mtxspin", &mtxspin)) { + if (mtxspin > USEC_PER_SEC>>4) + mtxspin = USEC_PER_SEC>>4; + nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime); + } else { + nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime); + } + MutexSpin = (unsigned int)abstime; +} + /* * This is called from the machine-independent routine cpu_up() * to perform machine-dependent info updates. Defer to cpu_thread_init(). @@ -426,3 +522,134 @@ current_thread(void) { return(current_thread_fast()); } + +/* + * Set the worst-case time for the C4 to C2 transition. + * The maxdelay parameter is in nanoseconds. + */ + +void +ml_set_maxsnoop(uint32_t maxdelay) +{ + C4C2SnoopDelay = maxdelay; /* Set the transition time */ + machine_nap_policy(); /* Adjust the current nap state */ +} + + +/* + * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds. + */ + +unsigned +ml_get_maxsnoop(void) +{ + return C4C2SnoopDelay; /* Set the transition time */ +} + + +uint32_t +ml_get_maxbusdelay(void) +{ + return maxBusDelay; +} + +/* + * Set the maximum delay time allowed for snoop on the bus. + * + * Note that this value will be compared to the amount of time that it takes + * to transition from a non-snooping power state (C4) to a snooping state (C2). + * If maxBusDelay is less than C4C2SnoopDelay, + * we will not enter the lowest power state. + */ + +void +ml_set_maxbusdelay(uint32_t mdelay) +{ + maxBusDelay = mdelay; /* Set the delay */ + machine_nap_policy(); /* Adjust the current nap state */ +} + + +boolean_t ml_is64bit(void) { + + return (cpu_mode_is64bit()); +} + + +boolean_t ml_thread_is64bit(thread_t thread) { + + return (thread_is_64bit(thread)); +} + + +boolean_t ml_state_is64bit(void *saved_state) { + + return is_saved_state64(saved_state); +} + +void ml_cpu_set_ldt(int selector) +{ + /* + * Avoid loading the LDT + * if we're setting the KERNEL LDT and it's already set. + */ + if (selector == KERNEL_LDT && + current_cpu_datap()->cpu_ldt == KERNEL_LDT) + return; + + /* + * If 64bit this requires a mode switch (and back). + */ + if (cpu_mode_is64bit()) + ml_64bit_lldt(selector); + else + lldt(selector); + current_cpu_datap()->cpu_ldt = selector; +} + +void ml_fp_setvalid(boolean_t value) +{ + fp_setvalid(value); +} + +#if MACH_KDB + +/* + * Display the global msrs + * * + * ms + */ +void +db_msr(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + + uint32_t i, msrlow, msrhigh; + + /* Try all of the first 4096 msrs */ + for (i = 0; i < 4096; i++) { + if (!rdmsr_carefully(i, &msrlow, &msrhigh)) { + db_printf("%08X - %08X.%08X\n", i, msrhigh, msrlow); + } + } + + /* Try all of the 4096 msrs at 0x0C000000 */ + for (i = 0; i < 4096; i++) { + if (!rdmsr_carefully(0x0C000000 | i, &msrlow, &msrhigh)) { + db_printf("%08X - %08X.%08X\n", + 0x0C000000 | i, msrhigh, msrlow); + } + } + + /* Try all of the 4096 msrs at 0xC0000000 */ + for (i = 0; i < 4096; i++) { + if (!rdmsr_carefully(0xC0000000 | i, &msrlow, &msrhigh)) { + db_printf("%08X - %08X.%08X\n", + 0xC0000000 | i, msrhigh, msrlow); + } + } +} + +#endif diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index aac0dd848..d9060ecf0 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -36,6 +36,24 @@ __BEGIN_DECLS +/* are we a 64 bit platform ? */ + +boolean_t ml_is64bit(void); + +/* is this a 64bit thread? */ + +boolean_t ml_thread_is64bit(thread_t); + +/* is this a 64bit thread? */ + +boolean_t ml_state_is64bit(void *); + +/* set state of fpu save area for signal handling */ + +void ml_fp_setvalid(boolean_t); + +void ml_cpu_set_ldt(int); + /* Interrupt handling */ /* Initialize Interrupts */ @@ -54,6 +72,7 @@ boolean_t ml_at_interrupt_context(void); void ml_cause_interrupt(void); void ml_get_timebase(unsigned long long *timestamp); +void ml_init_lock_timeout(void); /* Type for the Time Base Enable function */ typedef void (*time_base_enable_t)(cpu_id_t cpu_id, boolean_t enable); @@ -95,6 +114,12 @@ vm_offset_t ml_static_ptovirt( vm_offset_t); +#ifdef XNU_KERNEL_PRIVATE +vm_offset_t +ml_boot_ptovirt( + vm_offset_t); +#endif + /* PCI config cycle probing */ boolean_t ml_probe_read( vm_offset_t paddr, @@ -199,6 +224,15 @@ vm_offset_t ml_io_map( vm_offset_t ml_static_malloc( vm_size_t size); + +extern uint32_t bounce_pool_base; +extern uint32_t bounce_pool_size; + +void ml_get_bouncepool_info( + vm_offset_t *phys_addr, + vm_size_t *size); + + #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ /* Zero bytes starting at a physical address */ @@ -231,6 +265,14 @@ extern int set_be_bit(void); extern int clr_be_bit(void); extern int be_tracing(void); +extern void ml_set_maxsnoop(uint32_t maxdelay); +extern unsigned ml_get_maxsnoop(void); +extern void ml_set_maxbusdelay(uint32_t mdelay); +extern uint32_t ml_get_maxbusdelay(void); +extern void ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect); + +extern uint64_t tmrCvt(uint64_t time, uint64_t conversion); + #endif /* __APPLE_API_PRIVATE */ __END_DECLS diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s index f18d06c9f..b017f473a 100644 --- a/osfmk/i386/machine_routines_asm.s +++ b/osfmk/i386/machine_routines_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,7 +19,25 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #include +#include +#include + +#include +#include +#include + +#define PA(addr) (addr) +#define VA(addr) (addr) + +/* + * GAS won't handle an intersegment jump with a relocatable offset. + */ +#define LJMP(segment,address) \ + .byte 0xea ;\ + .long address ;\ + .word segment /* ** ml_get_timebase() @@ -31,12 +49,148 @@ */ ENTRY(ml_get_timebase) - movl S_ARG0, %ecx + movl S_ARG0, %ecx + + rdtsc + + movl %edx, 0(%ecx) + movl %eax, 4(%ecx) + + ret + + +/* + * Convert between various timer units + * + * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) + * + * This code converts 64-bit time units to other units. + * For example, the TSC is converted to HPET units. + * + * Time is a 64-bit integer that is some number of ticks. + * Conversion is 64-bit fixed point number which is composed + * of a 32 bit integer and a 32 bit fraction. + * + * The time ticks are multiplied by the conversion factor. The + * calculations are done as a 128-bit value but both the high + * and low words are dropped. The high word is overflow and the + * low word is the fraction part of the result. + * + * We return a 64-bit value. + * + * Note that we can use this function to multiply 2 conversion factors. + * We do this in order to calculate the multiplier used to convert + * directly between any two units. + * + */ + + .globl EXT(tmrCvt) + .align FALIGN + +LEXT(tmrCvt) + + pushl %ebp // Save a volatile + movl %esp,%ebp // Get the parameters - 8 + pushl %ebx // Save a volatile + pushl %esi // Save a volatile + pushl %edi // Save a volatile + +// %ebp + 8 - low-order ts +// %ebp + 12 - high-order ts +// %ebp + 16 - low-order cvt +// %ebp + 20 - high-order cvt + + movl 8(%ebp),%eax // Get low-order ts + mull 16(%ebp) // Multiply by low-order conversion + movl %edx,%edi // Need to save only the high order part + + movl 12(%ebp),%eax // Get the high-order ts + mull 16(%ebp) // Multiply by low-order conversion + addl %eax,%edi // Add in the overflow from the low x low calculation + adcl $0,%edx // Add in any overflow to high high part + movl %edx,%esi // Save high high part + +// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt +// in %esi:%edi + + movl 8(%ebp),%eax // Get low-order ts + mull 20(%ebp) // Multiply by high-order conversion + movl %eax,%ebx // Need to save the low order part + movl %edx,%ecx // Need to save the high order part + + movl 12(%ebp),%eax // Get the high-order ts + mull 20(%ebp) // Multiply by high-order conversion + +// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high +// We don't care about the highest word since it is overflow + + addl %edi,%ebx // Add the low words + adcl %ecx,%esi // Add in the high plus carry from low + addl %eax,%esi // Add in the rest of the high + + movl %ebx,%eax // Pass back low word + movl %esi,%edx // and the high word + + popl %edi // Restore a volatile + popl %esi // Restore a volatile + popl %ebx // Restore a volatile + popl %ebp // Restore a volatile + + ret // Leave... + + .globl EXT(rtc_nanotime_store) + .align FALIGN + +LEXT(rtc_nanotime_store) + push %ebp + mov %esp,%ebp + + mov 32(%ebp),%edx + + mov 8(%ebp),%eax + mov %eax,RNT_TSC_BASE(%edx) + mov 12(%ebp),%eax + mov %eax,RNT_TSC_BASE+4(%edx) + + mov 24(%ebp),%eax + mov %eax,RNT_SCALE(%edx) + + mov 28(%ebp),%eax + mov %eax,RNT_SHIFT(%edx) + + mov 16(%ebp),%eax + mov %eax,RNT_NS_BASE(%edx) + mov 20(%ebp),%eax + mov %eax,RNT_NS_BASE+4(%edx) + + pop %ebp + ret + + .globl EXT(rtc_nanotime_load) + .align FALIGN + +LEXT(rtc_nanotime_load) + push %ebp + mov %esp,%ebp + + mov 8(%ebp),%ecx + mov 12(%ebp),%edx + + mov RNT_TSC_BASE(%ecx),%eax + mov %eax,RNT_TSC_BASE(%edx) + mov RNT_TSC_BASE+4(%ecx),%eax + mov %eax,RNT_TSC_BASE+4(%edx) - rdtsc + mov RNT_SCALE(%ecx),%eax + mov %eax,RNT_SCALE(%edx) - movl %edx, 0(%ecx) - movl %eax, 4(%ecx) + mov RNT_SHIFT(%ecx),%eax + mov %eax,RNT_SHIFT(%edx) - ret + mov RNT_NS_BASE(%ecx),%eax + mov %eax,RNT_NS_BASE(%edx) + mov RNT_NS_BASE+4(%ecx),%eax + mov %eax,RNT_NS_BASE+4(%edx) + pop %ebp + ret diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index dfa24f8a2..1d17ca4da 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -23,11 +23,26 @@ * @OSF_COPYRIGHT@ */ +#ifndef _I386_MISC_PROTOS_H_ +#define _I386_MISC_PROTOS_H_ + #include -extern void i386_preinit(void); -extern void i386_init(void); -extern void i386_vm_init(unsigned int, struct KernelBootArgs *); +struct boot_args; +struct cpu_data; + +extern void i386_init(vm_offset_t); +extern void i386_macho_zerofill(void); +extern void i386_vm_init( + uint64_t maxmem, + boolean_t IA32e, + struct boot_args *args); +extern void cpu_IA32e_enable(struct cpu_data *); +extern void cpu_IA32e_disable(struct cpu_data *); +extern void ml_load_desc64(void); +extern void ml_64bit_wrmsr64(uint32_t msr, uint64_t value); +extern void cpu_window_init(int); +extern void ml_64bit_lldt(int); extern void machine_startup(void); @@ -44,6 +59,9 @@ extern void cpu_shutdown(void); extern void fix_desc( void * desc, int num_desc); +extern void fix_desc64( + void * desc, + int num_desc); extern void cnpollc( boolean_t on); extern void form_pic_mask(void); @@ -57,9 +75,6 @@ extern void blkclr( const char *from, int nbytes); -extern void kdb_kintr(void); -extern void kdb_console(void); - extern unsigned int div_scale( unsigned int dividend, unsigned int divisor, @@ -84,11 +99,13 @@ extern void dcache_incoherent_io_store64(addr64_t pa, unsigned int count); extern processor_t cpu_processor_alloc(boolean_t is_boot_cpu); extern void cpu_processor_free(processor_t proc); +extern void *chudxnu_cpu_alloc(boolean_t is_boot_cpu); +extern void chudxnu_cpu_free(void *); + extern void sysclk_gettime_interrupts_disabled( mach_timespec_t *cur_time); - -extern void rtclock_intr(struct i386_interrupt_state *regs); +extern void rtc_nanotime_init_commpage(void); extern void rtc_sleep_wakeup(void); @@ -98,5 +115,9 @@ extern void rtc_clock_stepping( extern void rtc_clock_stepped( uint32_t new_frequency, uint32_t old_frequency); +extern void rtc_clock_napped( + uint64_t); extern void x86_lowmem_free(void); + +#endif /* _I386_MISC_PROTOS_H_ */ diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 1edc74a25..5772db42f 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -61,6 +61,25 @@ #include #include #include +#include +#include +#include +#include + +#include +#include + +#include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif #if MP_DEBUG #define PAUSE delay(1000000) @@ -70,24 +89,6 @@ #define PAUSE #endif /* MP_DEBUG */ -/* - * By default, use high vectors to leave vector space for systems - * with multiple I/O APIC's. However some systems that boot with - * local APIC disabled will hang in SMM when vectors greater than - * 0x5F are used. Those systems are not expected to have I/O APIC - * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect. - */ -#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0 -#define LAPIC_REDUCED_INTERRUPT_BASE 0x50 -/* - * Specific lapic interrupts are relative to this base: - */ -#define LAPIC_PERFCNT_INTERRUPT 0xB -#define LAPIC_TIMER_INTERRUPT 0xC -#define LAPIC_SPURIOUS_INTERRUPT 0xD -#define LAPIC_INTERPROCESSOR_INTERRUPT 0xE -#define LAPIC_ERROR_INTERRUPT 0xF - /* Initialize lapic_id so cpu_number() works on non SMP systems */ unsigned long lapic_id_initdata = 0; unsigned long lapic_id = (unsigned long)&lapic_id_initdata; @@ -95,6 +96,7 @@ vm_offset_t lapic_start; static i386_intr_func_t lapic_timer_func; static i386_intr_func_t lapic_pmi_func; +static i386_intr_func_t lapic_thermal_func; /* TRUE if local APIC was enabled by the OS not by the BIOS */ static boolean_t lapic_os_enabled = FALSE; @@ -104,9 +106,19 @@ int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE; void slave_boot_init(void); +#if MACH_KDB +static void mp_kdb_wait(void); +volatile boolean_t mp_kdb_trap = FALSE; +volatile long mp_kdb_ncpus = 0; +#endif + static void mp_kdp_wait(void); static void mp_rendezvous_action(void); +static int NMIInterruptHandler(void *regs); +static boolean_t cpu_signal_pending(int cpu, mp_event_t event); +static void cpu_NMI_interrupt(int cpu); + boolean_t smp_initialized = FALSE; decl_simple_lock_data(,mp_kdp_lock); @@ -119,7 +131,7 @@ static void (*mp_rv_action_func)(void *arg); static void (*mp_rv_teardown_func)(void *arg); static void *mp_rv_func_arg; static int mp_rv_ncpus; -static long mp_rv_waiters[2]; +static volatile long mp_rv_waiters[2]; decl_simple_lock_data(,mp_rv_lock); int lapic_to_cpu[MAX_CPUS]; @@ -143,6 +155,24 @@ lapic_cpu_map(int apic_id, int cpu) lapic_to_cpu[apic_id] = cpu; } +/* + * Retrieve the local apic ID a cpu. + * + * Returns the local apic ID for the given processor. + * If the processor does not exist or apic not configured, returns -1. + */ + +uint32_t +ml_get_apicid(uint32_t cpu) +{ + if(cpu >= (uint32_t)MAX_CPUS) + return 0xFFFFFFFF; /* Return -1 if cpu too big */ + + /* Return the apic ID (or -1 if not configured) */ + return (uint32_t)cpu_to_lapic[cpu]; + +} + #ifdef MP_DEBUG static void lapic_cpu_map_dump(void) @@ -169,18 +199,6 @@ lapic_cpu_map_dump(void) #define LAPIC_DUMP() #endif /* MP_DEBUG */ -#define LAPIC_REG(reg) \ - (*((volatile int *)(lapic_start + LAPIC_##reg))) -#define LAPIC_REG_OFFSET(reg,off) \ - (*((volatile int *)(lapic_start + LAPIC_##reg + (off)))) - -#define LAPIC_VECTOR(src) \ - (lapic_interrupt_base + LAPIC_##src##_INTERRUPT) - -#define LAPIC_ISR_IS_SET(base,src) \ - (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \ - (1 <<((base + LAPIC_##src##_INTERRUPT)%32))) - #if GPROF /* * Initialize dummy structs for profiling. These aren't used but @@ -200,8 +218,6 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; #define GPROF_INIT() #endif /* GPROF */ -extern void master_up(void); - void smp_init(void) { @@ -236,16 +252,20 @@ smp_init(void) /* Establish a map to the local apic */ lapic_start = vm_map_min(kernel_map); result = vm_map_find_space(kernel_map, &lapic_start, - round_page(LAPIC_SIZE), 0, &entry); + round_page(LAPIC_SIZE), 0, + VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry); if (result != KERN_SUCCESS) { panic("smp_init: vm_map_find_entry FAILED (err=%d)", result); } vm_map_unlock(kernel_map); +/* Map in the local APIC non-cacheable, as recommended by Intel + * in section 8.4.1 of the "System Programming Guide". + */ pmap_enter(pmap_kernel(), lapic_start, (ppnum_t) i386_btop(lapic_base), - VM_PROT_READ|VM_PROT_WRITE, - VM_WIMG_USE_DEFAULT, + VM_PROT_READ|VM_PROT_WRITE, + VM_WIMG_IO, TRUE); lapic_id = (unsigned long)(lapic_start + LAPIC_ID); @@ -256,19 +276,16 @@ smp_init(void) /* Set up the lapic_id <-> cpu_number map and add this boot processor */ lapic_cpu_map_init(); lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0); + kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]); lapic_init(); cpu_thread_init(); - if (pmc_init() != KERN_SUCCESS) - printf("Performance counters not available\n"); - GPROF_INIT(); DBGLOG_CPU_INIT(master_cpu); slave_boot_init(); - master_up(); smp_initialized = TRUE; @@ -336,6 +353,11 @@ lapic_dump(void) DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED)); + kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n", + LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_VECTOR_MASK, + DM[(LAPIC_REG(LVT_THERMAL)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], + (LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", + BOOL(LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_MASKED)); kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n", LAPIC_REG(LVT_LINT0)&LAPIC_LVT_VECTOR_MASK, DM[(LAPIC_REG(LVT_LINT0)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], @@ -373,6 +395,26 @@ lapic_dump(void) kprintf("\n"); } +#if MACH_KDB +/* + * Displays apic junk + * + * da + */ +void +db_apic(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + + lapic_dump(); + + return; +} + +#endif + boolean_t lapic_probe(void) { @@ -397,7 +439,7 @@ lapic_probe(void) /* * Re-initialize cpu features info and re-check. */ - set_cpu_model(); + cpuid_set_info(); if (cpuid_features() & CPUID_FEATURE_APIC) { printf("Local APIC discovered and enabled\n"); lapic_os_enabled = TRUE; @@ -445,7 +487,7 @@ lapic_shutdown(void) rdmsr(MSR_IA32_APIC_BASE, lo, hi); lo &= ~MSR_IA32_APIC_BASE_ENABLE; wrmsr(MSR_IA32_APIC_BASE, lo, hi); - set_cpu_model(); + cpuid_set_info(); mp_enable_preemption(); } @@ -478,10 +520,12 @@ lapic_init(void) /* Perfmon: unmasked */ LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); + /* Thermal: unmasked */ + LAPIC_REG(LVT_THERMAL) = LAPIC_VECTOR(THERMAL); + lapic_esr_clear(); LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR); - } void @@ -539,6 +583,12 @@ lapic_set_pmi_func(i386_intr_func_t func) lapic_pmi_func = func; } +void +lapic_set_thermal_func(i386_intr_func_t func) +{ + lapic_thermal_func = func; +} + static inline void _lapic_end_of_interrupt(void) { @@ -552,42 +602,77 @@ lapic_end_of_interrupt(void) } int -lapic_interrupt(int interrupt, void *state) +lapic_interrupt(int interrupt, x86_saved_state_t *state) { + int retval = 0; + + /* Did we just field an interruption for the HPET comparator? */ + if(current_cpu_datap()->cpu_pmHpetVec == ((uint32_t)interrupt - 0x40)) { + /* Yes, go handle it... */ + retval = HPETInterrupt(); + /* Was it really handled? */ + if(retval) { + /* If so, EOI the 'rupt */ + _lapic_end_of_interrupt(); + /* + * and then leave, + * indicating that this has been handled + */ + return 1; + } + } + interrupt -= lapic_interrupt_base; - if (interrupt < 0) - return 0; + if (interrupt < 0) { + if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base)) { + retval = NMIInterruptHandler(state); + _lapic_end_of_interrupt(); + return retval; + } + else + return 0; + } switch(interrupt) { case LAPIC_PERFCNT_INTERRUPT: if (lapic_pmi_func != NULL) - (*lapic_pmi_func)( - (struct i386_interrupt_state *) state); + (*lapic_pmi_func)(NULL); /* Clear interrupt masked */ LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); _lapic_end_of_interrupt(); - return 1; + retval = 1; + break; case LAPIC_TIMER_INTERRUPT: _lapic_end_of_interrupt(); if (lapic_timer_func != NULL) - (*lapic_timer_func)( - (struct i386_interrupt_state *) state); - return 1; + (*lapic_timer_func)(state); + retval = 1; + break; + case LAPIC_THERMAL_INTERRUPT: + if (lapic_thermal_func != NULL) + (*lapic_thermal_func)(NULL); + _lapic_end_of_interrupt(); + retval = 1; + break; case LAPIC_ERROR_INTERRUPT: lapic_dump(); panic("Local APIC error\n"); _lapic_end_of_interrupt(); - return 1; + retval = 1; + break; case LAPIC_SPURIOUS_INTERRUPT: kprintf("SPIV\n"); /* No EOI required here */ - return 1; + retval = 1; + break; case LAPIC_INTERPROCESSOR_INTERRUPT: - cpu_signal_handler((struct i386_interrupt_state *) state); _lapic_end_of_interrupt(); - return 1; + cpu_signal_handler(state); + retval = 1; + break; } - return 0; + + return retval; } void @@ -638,8 +723,14 @@ intel_startCPU( DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD); - /* Initialize (or re-initialize) the descriptor tables for this cpu. */ - mp_desc_init(cpu_datap(slot_num), FALSE); + /* + * Initialize (or re-initialize) the descriptor tables for this cpu. + * Propagate processor mode to slave. + */ + if (cpu_mode_is64bit()) + cpu_desc_init64(cpu_datap(slot_num), FALSE); + else + cpu_desc_init(cpu_datap(slot_num), FALSE); /* Serialize use of the slave boot stack. */ mutex_lock(&mp_cpu_boot_lock); @@ -677,13 +768,13 @@ intel_startCPU( mutex_unlock(&mp_cpu_boot_lock); if (!cpu_datap(slot_num)->cpu_running) { - DBG("Failed to start CPU %02d\n", slot_num); + kprintf("Failed to start CPU %02d\n", slot_num); printf("Failed to start CPU %02d, rebooting...\n", slot_num); delay(1000000); cpu_shutdown(); return KERN_SUCCESS; } else { - DBG("Started CPU %02d\n", slot_num); + kprintf("Started cpu %d (lapic id %p)\n", slot_num, lapic); printf("Started CPU %02d\n", slot_num); return KERN_SUCCESS; } @@ -691,7 +782,7 @@ intel_startCPU( extern char slave_boot_base[]; extern char slave_boot_end[]; -extern void pstart(void); +extern void slave_pstart(void); void slave_boot_init(void) @@ -709,7 +800,7 @@ slave_boot_init(void) * The slave boot code is responsible for switching to protected * mode and then jumping to the common startup, _start(). */ - bcopy_phys((addr64_t) kvtophys((vm_offset_t) slave_boot_base), + bcopy_phys(kvtophys((vm_offset_t) slave_boot_base), (addr64_t) MP_BOOT, slave_boot_end-slave_boot_base); @@ -724,9 +815,9 @@ slave_boot_init(void) * common startup entry. */ DBG("writing 0x%x at phys 0x%x\n", - kvtophys((vm_offset_t) &pstart), MP_MACH_START+MP_BOOT); + kvtophys((vm_offset_t) &slave_pstart), MP_MACH_START+MP_BOOT); ml_phys_write_word(MP_MACH_START+MP_BOOT, - kvtophys((vm_offset_t) &pstart)); + (unsigned int)kvtophys((vm_offset_t) &slave_pstart)); /* Flush caches */ __asm__("wbinvd"); @@ -741,7 +832,7 @@ MP_EVENT_NAME_DECL(); #endif /* MP_DEBUG */ void -cpu_signal_handler(__unused struct i386_interrupt_state *regs) +cpu_signal_handler(x86_saved_state_t *regs) { int my_cpu; volatile int *my_word; @@ -757,12 +848,19 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) do { #if MACH_KDB && MACH_ASSERT if (i-- <= 0) - Debugger("cpu_signal_handler"); + Debugger("cpu_signal_handler: signals did not clear"); #endif /* MACH_KDB && MACH_ASSERT */ #if MACH_KDP if (i_bit(MP_KDP, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_KDP); i_bit_clear(MP_KDP, my_word); +/* Ensure that the i386_kernel_state at the base of the + * current thread's stack (if any) is synchronized with the + * context at the moment of the interrupt, to facilitate + * access through the debugger. + * XXX 64-bit state? + */ + sync_iss_to_iks(saved_state32(regs)); mp_kdp_wait(); } else #endif /* MACH_KDP */ @@ -776,16 +874,20 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) ast_check(cpu_to_processor(my_cpu)); #if MACH_KDB } else if (i_bit(MP_KDB, my_word)) { - extern kdb_is_slave[]; i_bit_clear(MP_KDB, my_word); - kdb_is_slave[my_cpu]++; - kdb_kintr(); + current_cpu_datap()->cpu_kdb_is_slave++; + mp_kdb_wait(); + current_cpu_datap()->cpu_kdb_is_slave--; #endif /* MACH_KDB */ } else if (i_bit(MP_RENDEZVOUS, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); i_bit_clear(MP_RENDEZVOUS, my_word); mp_rendezvous_action(); + } else if (i_bit(MP_CHUD, my_word)) { + DBGLOG(cpu_handle,my_cpu,MP_CHUD); + i_bit_clear(MP_CHUD, my_word); + chudxnu_cpu_signal_handler(); } } while (*my_word); @@ -793,19 +895,49 @@ cpu_signal_handler(__unused struct i386_interrupt_state *regs) } + +/* We want this to show up in backtraces, so mark it noinline + */ +static int __attribute__((noinline)) +NMIInterruptHandler(void *regs) +{ + boolean_t state = ml_set_interrupts_enabled(FALSE); + sync_iss_to_iks_unconditionally(regs); + mp_kdp_wait(); + (void) ml_set_interrupts_enabled(state); + return 1; +} + #ifdef MP_DEBUG extern int max_lock_loops; #endif /* MP_DEBUG */ + +int trappedalready = 0; /* (BRINGUP */ + void cpu_interrupt(int cpu) { boolean_t state; + + if(cpu_datap(cpu)->cpu_signals & 6) { /* (BRINGUP) */ + kprintf("cpu_interrupt: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu); + } if (smp_initialized) { +#if MACH_KDB +// if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) { /* (BRINGUP) */ +// if(kdb_cpu != cpu_number()) { +// trappedalready = 1; +// panic("cpu_interrupt: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", +// cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu); +// } +// } +#endif + /* Wait for previous interrupt to be delivered... */ #ifdef MP_DEBUG - int pending_busy_count = 0; + int pending_busy_count = 0; while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { if (++pending_busy_count > max_lock_loops) panic("cpus_interrupt() deadlock\n"); @@ -825,18 +957,43 @@ cpu_interrupt(int cpu) } +/* + * Send a true NMI via the local APIC to the specified CPU. + */ +static void +cpu_NMI_interrupt(int cpu) +{ + boolean_t state; + + if (smp_initialized) { + state = ml_set_interrupts_enabled(FALSE); + LAPIC_REG(ICRD) = + cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT; +/* The vector is ignored in this case, the other CPU will come in on the + * NMI vector. + */ + LAPIC_REG(ICR) = + LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_NMI; + (void) ml_set_interrupts_enabled(state); + } + +} + void i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) { volatile int *signals = &cpu_datap(cpu)->cpu_signals; uint64_t tsc_timeout; - + if (!cpu_datap(cpu)->cpu_running) return; - DBGLOG(cpu_signal, cpu, event); + if (event == MP_TLB_FLUSH) + KERNEL_DEBUG(0xef800020 | DBG_FUNC_START, cpu, 0, 0, 0, 0); + DBGLOG(cpu_signal, cpu, event); + i_bit_set(event, signals); cpu_interrupt(cpu); if (mode == SYNC) { @@ -851,6 +1008,8 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) goto again; } } + if (event == MP_TLB_FLUSH) + KERNEL_DEBUG(0xef800020 | DBG_FUNC_END, cpu, 0, 0, 0, 0); } void @@ -902,14 +1061,19 @@ mp_rendezvous_action(void) mp_rv_setup_func(mp_rv_func_arg); /* spin on entry rendezvous */ atomic_incl(&mp_rv_waiters[0], 1); - while (*((volatile long *) &mp_rv_waiters[0]) < mp_rv_ncpus) + while (mp_rv_waiters[0] < mp_rv_ncpus) { + boolean_t intr = ml_set_interrupts_enabled(FALSE); + /* poll for pesky tlb flushes */ + handle_pending_TLB_flushes(); + ml_set_interrupts_enabled(intr); cpu_pause(); + } /* action function */ if (mp_rv_action_func != NULL) mp_rv_action_func(mp_rv_func_arg); /* spin on exit rendezvous */ atomic_incl(&mp_rv_waiters[1], 1); - while (*((volatile long *) &mp_rv_waiters[1]) < mp_rv_ncpus) + while (mp_rv_waiters[1] < mp_rv_ncpus) cpu_pause(); /* teardown function */ if (mp_rv_teardown_func != NULL) @@ -959,9 +1123,61 @@ mp_rendezvous(void (*setup_func)(void *), simple_unlock(&mp_rv_lock); } +void +mp_rendezvous_break_lock(void) +{ + simple_lock_init(&mp_rv_lock, 0); +} + +static void +setup_disable_intrs(__unused void * param_not_used) +{ + /* disable interrupts before the first barrier */ + boolean_t intr = ml_set_interrupts_enabled(FALSE); + + current_cpu_datap()->cpu_iflag = intr; + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +static void +teardown_restore_intrs(__unused void * param_not_used) +{ + /* restore interrupt flag following MTRR changes */ + ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag); + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +/* + * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled. + * This is exported for use by kexts. + */ +void +mp_rendezvous_no_intrs( + void (*action_func)(void *), + void *arg) +{ + mp_rendezvous(setup_disable_intrs, + action_func, + teardown_restore_intrs, + arg); +} + +void +handle_pending_TLB_flushes(void) +{ + volatile int *my_word = ¤t_cpu_datap()->cpu_signals; + + if (i_bit(MP_TLB_FLUSH, my_word)) { + DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH); + i_bit_clear(MP_TLB_FLUSH, my_word); + pmap_update_interrupt(); + } +} + + #if MACH_KDP volatile boolean_t mp_kdp_trap = FALSE; -long mp_kdp_ncpus; +volatile long mp_kdp_ncpus; boolean_t mp_kdp_state; @@ -982,6 +1198,7 @@ mp_kdp_enter(void) */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); simple_lock(&mp_kdp_lock); + while (mp_kdp_trap) { simple_unlock(&mp_kdp_lock); DBG("mp_kdp_enter() race lost\n"); @@ -992,41 +1209,79 @@ mp_kdp_enter(void) mp_kdp_trap = TRUE; simple_unlock(&mp_kdp_lock); - /* Deliver a nudge to other cpus, counting how many */ + /* + * Deliver a nudge to other cpus, counting how many + */ DBG("mp_kdp_enter() signaling other processors\n"); for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; ncpus++; - i386_signal_cpu(cpu, MP_KDP, ASYNC); + i386_signal_cpu(cpu, MP_KDP, ASYNC); } - - /* Wait other processors to spin. */ + /* + * Wait other processors to synchronize + */ DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); - tsc_timeout = rdtsc64() + (1000*1000*1000); - while (*((volatile unsigned int *) &mp_kdp_ncpus) != ncpus - && rdtsc64() < tsc_timeout) { + + tsc_timeout = rdtsc64() + (ncpus * 100 * 1000 * 1000); + + while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { + /* + * A TLB shootdown request may be pending... this would + * result in the requesting processor waiting in + * PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); cpu_pause(); } +/* If we've timed out, and some processor(s) are still unresponsive, + * interrupt them with an NMI via the local APIC. + */ + if (mp_kdp_ncpus != ncpus) { + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) + continue; + if (cpu_signal_pending(cpu, MP_KDP)) + cpu_NMI_interrupt(cpu); + } + } + DBG("mp_kdp_enter() %d processors done %s\n", mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); + postcode(MP_KDP_ENTER); } +static boolean_t +cpu_signal_pending(int cpu, mp_event_t event) +{ + volatile int *signals = &cpu_datap(cpu)->cpu_signals; + boolean_t retval = FALSE; + + if (i_bit(event, signals)) + retval = TRUE; + return retval; +} + static void mp_kdp_wait(void) { - boolean_t state; - - state = ml_set_interrupts_enabled(TRUE); DBG("mp_kdp_wait()\n"); atomic_incl(&mp_kdp_ncpus, 1); while (mp_kdp_trap) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + cpu_pause(); } atomic_decl(&mp_kdp_ncpus, 1); DBG("mp_kdp_wait() done\n"); - (void) ml_set_interrupts_enabled(state); } void @@ -1035,10 +1290,18 @@ mp_kdp_exit(void) DBG("mp_kdp_exit()\n"); atomic_decl(&mp_kdp_ncpus, 1); mp_kdp_trap = FALSE; + __asm__ volatile("mfence"); /* Wait other processors to stop spinning. XXX needs timeout */ DBG("mp_kdp_exit() waiting for processors to resume\n"); - while (*((volatile long *) &mp_kdp_ncpus) > 0) { + while (mp_kdp_ncpus > 0) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + cpu_pause(); } DBG("mp_kdp_exit() done\n"); @@ -1065,6 +1328,7 @@ cause_ast_check( } } +#if MACH_KDB /* * invoke kdb on slave processors */ @@ -1074,14 +1338,52 @@ remote_kdb(void) { unsigned int my_cpu = cpu_number(); unsigned int cpu; + int kdb_ncpus; + uint64_t tsc_timeout = 0; - mp_disable_preemption(); - for (cpu = 0; cpu < real_ncpus; cpu++) { + mp_kdb_trap = TRUE; + mp_kdb_ncpus = 1; + for (kdb_ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; - i386_signal_cpu(cpu, MP_KDB, SYNC); + kdb_ncpus++; + i386_signal_cpu(cpu, MP_KDB, ASYNC); } - mp_enable_preemption(); + DBG("remote_kdb() waiting for (%d) processors to suspend\n",kdb_ncpus); + + tsc_timeout = rdtsc64() + (kdb_ncpus * 100 * 1000 * 1000); + + while (mp_kdb_ncpus != kdb_ncpus && rdtsc64() < tsc_timeout) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + DBG("mp_kdp_enter() %d processors done %s\n", + mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out"); +} + +static void +mp_kdb_wait(void) +{ + DBG("mp_kdb_wait()\n"); + atomic_incl(&mp_kdb_ncpus, 1); + while (mp_kdb_trap) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + atomic_decl(&mp_kdb_ncpus, 1); + DBG("mp_kdb_wait() done\n"); } /* @@ -1096,9 +1398,34 @@ clear_kdb_intr(void) mp_enable_preemption(); } +void +mp_kdb_exit(void) +{ + DBG("mp_kdb_exit()\n"); + atomic_decl(&mp_kdb_ncpus, 1); + mp_kdb_trap = FALSE; + __asm__ volatile("mfence"); + + while (mp_kdb_ncpus > 0) { + /* + * a TLB shootdown request may be pending... this would result in the requesting + * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. + * Process it, so it can now enter mp_kdp_wait() + */ + handle_pending_TLB_flushes(); + + cpu_pause(); + } + DBG("mp_kdb_exit() done\n"); +} + +#endif /* MACH_KDB */ + /* * i386_init_slave() is called from pstart. * We're in the cpu's interrupt stack with interrupts disabled. + * At this point we are in legacy mode. We need to switch on IA32e + * if the mode is set to 64-bits. */ void i386_init_slave(void) @@ -1111,16 +1438,29 @@ i386_init_slave(void) DBG("i386_init_slave() CPU%d: phys (%d) active.\n", get_cpu_number(), get_cpu_phys_number()); + assert(!ml_get_interrupts_enabled()); + if (cpu_mode_is64bit()) { + cpu_IA32e_enable(current_cpu_datap()); + cpu_desc_load64(current_cpu_datap()); + fast_syscall_init64(); + } else { + fast_syscall_init(); + } + lapic_init(); LAPIC_DUMP(); LAPIC_CPU_MAP_DUMP(); + init_fpu(); + mtrr_update_cpu(); pat_init(); - cpu_init(); + cpu_thread_init(); + + cpu_init(); /* Sets cpu_running which starter cpu waits for */ slave_main(); @@ -1131,19 +1471,13 @@ void slave_machine_init(void) { /* - * Here in process context. + * Here in process context, but with interrupts disabled. */ DBG("slave_machine_init() CPU%d\n", get_cpu_number()); - init_fpu(); - - cpu_thread_init(); - - pmc_init(); - - cpu_machine_init(); - clock_init(); + + cpu_machine_init(); /* Interrupts enabled hereafter */ } #undef cpu_number() @@ -1198,44 +1532,5 @@ db_trap_hist(void) } #endif /* TRAP_DEBUG */ - -void db_lapic(int cpu); -unsigned int db_remote_read(int cpu, int reg); -void db_ioapic(unsigned int); -void kdb_console(void); - -void -kdb_console(void) -{ -} - -#define BOOLP(a) ((a)?' ':'!') - -static char *DM[8] = { - "Fixed", - "Lowest Priority", - "Invalid", - "Invalid", - "NMI", - "Reset", - "Invalid", - "ExtINT"}; - -unsigned int -db_remote_read(int cpu, int reg) -{ - return -1; -} - -void -db_lapic(int cpu) -{ -} - -void -db_ioapic(unsigned int ind) -{ -} - #endif /* MACH_KDB */ diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index 6863b946f..6477ca377 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -71,6 +71,7 @@ #include #include #include +#include __BEGIN_DECLS @@ -85,12 +86,13 @@ extern void lapic_shutdown(void); extern void lapic_smm_restore(void); extern boolean_t lapic_probe(void); extern void lapic_dump(void); -extern int lapic_interrupt(int interrupt, void *state); +extern int lapic_interrupt(int interrupt, x86_saved_state_t *state); extern void lapic_end_of_interrupt(void); extern int lapic_to_cpu[]; extern int cpu_to_lapic[]; extern int lapic_interrupt_base; extern void lapic_cpu_map(int lapic, int cpu_num); +extern uint32_t ml_get_apicid(uint32_t cpu); extern void lapic_set_timer( boolean_t interrupt, @@ -107,9 +109,50 @@ extern void lapic_get_timer( typedef void (*i386_intr_func_t)(void *); extern void lapic_set_timer_func(i386_intr_func_t func); extern void lapic_set_pmi_func(i386_intr_func_t func); +extern void lapic_set_thermal_func(i386_intr_func_t func); __END_DECLS +/* + * By default, use high vectors to leave vector space for systems + * with multiple I/O APIC's. However some systems that boot with + * local APIC disabled will hang in SMM when vectors greater than + * 0x5F are used. Those systems are not expected to have I/O APIC + * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect. + */ +#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0 +#define LAPIC_REDUCED_INTERRUPT_BASE 0x50 +/* + * Specific lapic interrupts are relative to this base + * in priority order from high to low: + */ + +#define LAPIC_PERFCNT_INTERRUPT 0xF +#define LAPIC_TIMER_INTERRUPT 0xE +#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD +#define LAPIC_THERMAL_INTERRUPT 0xC +#define LAPIC_ERROR_INTERRUPT 0xB +#define LAPIC_SPURIOUS_INTERRUPT 0xA +/* The vector field is ignored for NMI interrupts via the LAPIC + * or otherwise, so this is not an offset from the interrupt + * base. + */ +#define LAPIC_NMI_INTERRUPT 0x2 + +#define LAPIC_REG(reg) \ + (*((volatile uint32_t *)(lapic_start + LAPIC_##reg))) +#define LAPIC_REG_OFFSET(reg,off) \ + (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off)))) + +#define LAPIC_VECTOR(src) \ + (lapic_interrupt_base + LAPIC_##src##_INTERRUPT) + +#define LAPIC_ISR_IS_SET(base,src) \ + (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \ + (1 <<((base + LAPIC_##src##_INTERRUPT)%32))) + +extern vm_offset_t lapic_start; + #endif /* ASSEMBLER */ #define CPU_NUMBER(r) \ @@ -140,20 +183,28 @@ extern void console_cpu_free(void *console_buf); extern int kdb_cpu; /* current cpu running kdb */ extern int kdb_debug; -extern int kdb_is_slave[]; extern int kdb_active[]; extern volatile boolean_t mp_kdp_trap; extern void mp_kdp_enter(void); extern void mp_kdp_exit(void); +#if MACH_KDB +extern void mp_kdb_exit(void); +#endif + /* * All cpu rendezvous: */ -extern void mp_rendezvous(void (*setup_func)(void *), - void (*action_func)(void *), - void (*teardown_func)(void *), - void *arg); +extern void mp_rendezvous( + void (*setup_func)(void *), + void (*action_func)(void *), + void (*teardown_func)(void *), + void *arg); +extern void mp_rendezvous_no_intrs( + void (*action_func)(void *), + void *arg); +extern void mp_rendezvous_break_lock(void); __END_DECLS diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c index 03cc903d7..938a52f73 100644 --- a/osfmk/i386/mp_desc.c +++ b/osfmk/i386/mp_desc.c @@ -55,14 +55,18 @@ #include #include #include +#include #include +#include #include +#include #include #include #include #include #include +#include #include @@ -77,8 +81,8 @@ /* * First cpu`s interrupt stack. */ -extern char intstack[]; /* bottom */ -extern char eintstack[]; /* top */ +extern uint32_t low_intstack[]; /* bottom */ +extern uint32_t low_eintstack[]; /* top */ /* * Per-cpu data area pointers. @@ -92,6 +96,15 @@ decl_simple_lock_data(,cpu_lock); /* protects real_ncpus */ unsigned int real_ncpus = 1; unsigned int max_ncpus = MAX_CPUS; +extern void *hi_remap_text; +#define HI_TEXT(lo_text) \ + (((uint32_t)&lo_text - (uint32_t)&hi_remap_text) + HIGH_MEM_BASE) + +extern void hi_sysenter(void); +extern void hi64_sysenter(void); +extern void hi64_syscall(void); + + /* * Multiprocessor i386/i486 systems use a separate copy of the * GDT, IDT, LDT, and kernel TSS per processor. The first three @@ -107,13 +120,14 @@ unsigned int max_ncpus = MAX_CPUS; struct fake_descriptor ldt_desc_pattern = { (unsigned int) 0, - LDTSZ * sizeof(struct fake_descriptor) - 1, + LDTSZ_MIN * sizeof(struct fake_descriptor) - 1, 0, ACC_P|ACC_PL_K|ACC_LDT }; + struct fake_descriptor tss_desc_pattern = { (unsigned int) 0, - sizeof(struct i386_tss), + sizeof(struct i386_tss) - 1, 0, ACC_P|ACC_PL_K|ACC_TSS }; @@ -125,53 +139,133 @@ struct fake_descriptor cpudata_desc_pattern = { ACC_P|ACC_PL_K|ACC_DATA_W }; +struct fake_descriptor userwindow_desc_pattern = { + (unsigned int) 0, + ((NBPDE * NCOPY_WINDOWS) / PAGE_SIZE) - 1, + SZ_32 | SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W +}; + +struct fake_descriptor physwindow_desc_pattern = { + (unsigned int) 0, + PAGE_SIZE - 1, + SZ_32, + ACC_P|ACC_PL_K|ACC_DATA_W +}; + +/* + * This is the expanded, 64-bit variant of the kernel LDT descriptor. + * When switching to 64-bit mode this replaces KERNEL_LDT entry + * and the following empty slot. This enables the LDT to be referenced + * in the uber-space remapping window on the kernel. + */ +struct fake_descriptor64 kernel_ldt_desc64 = { + FAKE_UBER64(&master_ldt), + LDTSZ_MIN*sizeof(struct fake_descriptor)-1, + 0, + ACC_P|ACC_PL_K|ACC_LDT, + 0 +}; + +/* + * This is the expanded, 64-bit variant of the kernel TSS descriptor. + * It is follows pattern of the KERNEL_LDT. + */ +struct fake_descriptor64 kernel_tss_desc64 = { + FAKE_UBER64(&master_ktss64), + sizeof(struct x86_64_tss)-1, + 0, + ACC_P|ACC_PL_K|ACC_TSS, + 0 +}; + void -mp_desc_init( +cpu_desc_init( cpu_data_t *cdp, boolean_t is_boot_cpu) { - struct mp_desc_table *mpt = cdp->cpu_desc_tablep; - cpu_desc_index_t *cdt = &cdp->cpu_desc_index; + cpu_desc_table_t *cdt = cdp->cpu_desc_tablep; + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; if (is_boot_cpu) { /* * Master CPU uses the tables built at boot time. - * Just set the TSS and GDT pointers. + * Just set the index pointers to the high shared-mapping space. + * Note that the sysenter stack uses empty space above the ktss + * in the HIGH_FIXED_KTSS page. In this case we don't map the + * the real master_sstk in low memory. */ - cdt->cdi_ktss = &ktss; + cdi->cdi_ktss = (struct i386_tss *) + pmap_index_to_virt(HIGH_FIXED_KTSS) ; + cdi->cdi_sstk = (vm_offset_t) (cdi->cdi_ktss + 1) + + (vm_offset_t) &master_sstk.top - + (vm_offset_t) &master_sstk; #if MACH_KDB - cdt->cdi_dbtss = &dbtss; + cdi->cdi_dbtss = (struct i386_tss *) + pmap_index_to_virt(HIGH_FIXED_DBTSS); #endif /* MACH_KDB */ - cdt->cdi_gdt = gdt; - cdt->cdi_idt = idt; - cdt->cdi_ldt = ldt; + cdi->cdi_gdt = (struct fake_descriptor *) + pmap_index_to_virt(HIGH_FIXED_GDT); + cdi->cdi_idt = (struct fake_descriptor *) + pmap_index_to_virt(HIGH_FIXED_IDT); + cdi->cdi_ldt = (struct fake_descriptor *) + pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); } else { - cdt->cdi_ktss = &mpt->ktss; - cdt->cdi_gdt = mpt->gdt; - cdt->cdi_idt = mpt->idt; - cdt->cdi_ldt = mpt->ldt; + vm_offset_t cpu_hi_desc; + + cpu_hi_desc = pmap_cpu_high_shared_remap(cdp->cpu_number, + HIGH_CPU_DESC, + (vm_offset_t) cdt, 1); + + /* + * Per-cpu GDT, IDT, LDT, KTSS descriptors are allocated in one + * block (cpu_desc_table) and double-mapped into high shared space + * in one page window. + * Also, a transient stack for the fast sysenter path. The top of + * which is set at context switch time to point to the PCB using + * the high address. + */ + cdi->cdi_gdt = (struct fake_descriptor *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, gdt[0])); + cdi->cdi_idt = (struct fake_descriptor *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, idt[0])); + cdi->cdi_ktss = (struct i386_tss *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, ktss)); + cdi->cdi_sstk = cpu_hi_desc + + offsetof(cpu_desc_table_t, sstk.top); + + /* + * LDT descriptors are mapped into a seperate area. + */ + cdi->cdi_ldt = (struct fake_descriptor *) + pmap_cpu_high_shared_remap( + cdp->cpu_number, + HIGH_CPU_LDT_BEGIN, + (vm_offset_t) cdp->cpu_ldtp, + HIGH_CPU_LDT_END - HIGH_CPU_LDT_BEGIN + 1); /* * Copy the tables */ - bcopy((char *)idt, - (char *)mpt->idt, - sizeof(idt)); - bcopy((char *)gdt, - (char *)mpt->gdt, - sizeof(gdt)); - bcopy((char *)ldt, - (char *)mpt->ldt, - sizeof(ldt)); - bzero((char *)&mpt->ktss, + bcopy((char *)master_idt, + (char *)cdt->idt, + sizeof(master_idt)); + bcopy((char *)master_gdt, + (char *)cdt->gdt, + sizeof(master_gdt)); + bcopy((char *)master_ldt, + (char *)cdp->cpu_ldtp, + sizeof(master_ldt)); + bzero((char *)&cdt->ktss, sizeof(struct i386_tss)); #if MACH_KDB - cdt->cdi_dbtss = &dbtss; - bcopy((char *)&dbtss, - (char *)&mpt->dbtss, + cdi->cdi_dbtss = (struct i386_tss *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, dbtss)); + bcopy((char *)&master_dbtss, + (char *)&cdt->dbtss, sizeof(struct i386_tss)); #endif /* MACH_KDB */ @@ -179,32 +273,196 @@ mp_desc_init( * Fix up the entries in the GDT to point to * this LDT and this TSS. */ - mpt->gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - mpt->gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) mpt->ldt; - fix_desc(&mpt->gdt[sel_idx(KERNEL_LDT)], 1); + cdt->gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; + cdt->gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) cdi->cdi_ldt; + fix_desc(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); - mpt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - mpt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) &mpt->ktss; - fix_desc(&mpt->gdt[sel_idx(KERNEL_TSS)], 1); + cdt->gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; + cdt->gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) cdi->cdi_ldt; + fix_desc(&cdt->gdt[sel_idx(USER_LDT)], 1); - mpt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; - mpt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; - fix_desc(&mpt->gdt[sel_idx(CPU_DATA_GS)], 1); + cdt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; + cdt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) cdi->cdi_ktss; + fix_desc(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); + + cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; + cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; + fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); #if MACH_KDB - mpt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - mpt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) &mpt->dbtss; - fix_desc(&mpt->gdt[sel_idx(DEBUG_TSS)], 1); - - mpt->dbtss.esp0 = (int)(db_task_stack_store + - (INTSTACK_SIZE * (cpu + 1)) - sizeof (natural_t)); - mpt->dbtss.esp = mpt->dbtss.esp0; - mpt->dbtss.eip = (int)&db_task_start; + cdt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; + cdt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) cdi->cdi_dbtss; + fix_desc(&cdt->gdt[sel_idx(DEBUG_TSS)], 1); + + cdt->dbtss.esp0 = (int)(db_task_stack_store + + (INTSTACK_SIZE * (cdp->cpu_number)) - sizeof (natural_t)); + cdt->dbtss.esp = cdt->dbtss.esp0; + cdt->dbtss.eip = (int)&db_task_start; #endif /* MACH_KDB */ - mpt->ktss.ss0 = KERNEL_DS; - mpt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ + cdt->ktss.ss0 = KERNEL_DS; + cdt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ + + cpu_window_init(cdp->cpu_number); + + } + +} + +void +cpu_desc_init64( + cpu_data_t *cdp, + boolean_t is_boot_cpu) +{ + cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) + cdp->cpu_desc_tablep; + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + + if (is_boot_cpu) { + /* + * Master CPU uses the tables built at boot time. + * Just set the index pointers to the low memory space. + * Note that in 64-bit mode these are addressed in the + * double-mapped window (uber-space). + */ + cdi->cdi_ktss = (struct i386_tss *) &master_ktss64; + cdi->cdi_sstk = (vm_offset_t) &master_sstk.top; + cdi->cdi_gdt = master_gdt; + cdi->cdi_idt = (struct fake_descriptor *) &master_idt64; + cdi->cdi_ldt = (struct fake_descriptor *) &master_ldt; + + /* Replace the expanded LDT and TSS slots in the GDT: */ + *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] = + kernel_ldt_desc64; + *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_TSS)] = + kernel_tss_desc64; + + /* + * Fix up the expanded descriptors for 64-bit. + */ + fix_desc64((void *) &master_idt64, IDTSZ); + fix_desc64((void *) &master_gdt[sel_idx(KERNEL_LDT)], 1); + fix_desc64((void *) &master_gdt[sel_idx(KERNEL_TSS)], 1); + + /* + * Set the double-fault stack as IST1 in the 64-bit TSS + */ + master_ktss64.ist1 = UBER64(df_task_stack_end); + + } else { + /* + * Per-cpu GDT, IDT, KTSS descriptors are allocated in kernel + * heap (cpu_desc_table) and double-mapped in uber-space (over 4GB). + * LDT descriptors are mapped into a separate area. + */ + cdi->cdi_gdt = cdt->gdt; + cdi->cdi_idt = (struct fake_descriptor *) cdt->idt; + cdi->cdi_ktss = (struct i386_tss *) &cdt->ktss; + cdi->cdi_sstk = (vm_offset_t) &cdt->sstk.top; + cdi->cdi_ldt = cdp->cpu_ldtp; + + /* + * Copy the tables + */ + bcopy((char *)master_idt64, + (char *)cdt->idt, + sizeof(master_idt64)); + bcopy((char *)master_gdt, + (char *)cdt->gdt, + sizeof(master_gdt)); + bcopy((char *)master_ldt, + (char *)cdp->cpu_ldtp, + sizeof(master_ldt)); + bcopy((char *)&master_ktss64, + (char *)&cdt->ktss, + sizeof(struct x86_64_tss)); + + /* + * Fix up the entries in the GDT to point to + * this LDT and this TSS. + */ + kernel_ldt_desc64.offset[0] = (vm_offset_t) cdi->cdi_ldt; + *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_LDT)] = + kernel_ldt_desc64; + fix_desc64(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); + + kernel_ldt_desc64.offset[0] = (vm_offset_t) cdi->cdi_ldt; + *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(USER_LDT)] = + kernel_ldt_desc64; + fix_desc64(&cdt->gdt[sel_idx(USER_LDT)], 1); + + kernel_tss_desc64.offset[0] = (vm_offset_t) cdi->cdi_ktss; + *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_TSS)] = + kernel_tss_desc64; + fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); + + cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; + cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; + fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); + + /* Set double-fault stack as IST1 */ + cdt->ktss.ist1 = UBER64(cdt->dfstk + sizeof(cdt->dfstk)); + + /* + * Allocate copyio windows. + */ + cpu_window_init(cdp->cpu_number); + } + + /* Require that the top of the sysenter stack is 16-byte aligned */ + if ((cdi->cdi_sstk % 16) != 0) + panic("cpu_desc_init64() sysenter stack not 16-byte aligned"); +} + +/* + * Set MSRs for sysenter/sysexit for 64-bit. + */ +void +fast_syscall_init64(void) +{ + wrmsr64(MSR_IA32_SYSENTER_CS, SYSENTER_CS); + wrmsr64(MSR_IA32_SYSENTER_EIP, UBER64(hi64_sysenter)); + wrmsr64(MSR_IA32_SYSENTER_ESP, UBER64(current_sstk())); + + /* Enable syscall/sysret */ + wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_SCE); + + /* + * MSRs for 64-bit syscall/sysret + * Note USER_CS because sysret uses this + 16 when returning to + * 64-bit code. + */ + wrmsr64(MSR_IA32_LSTAR, UBER64(hi64_syscall)); + wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | + (((uint64_t)KERNEL64_CS) << 32)); + /* + * Emulate eflags cleared by sysenter but note that + * we also clear the trace trap to avoid the complications + * of single-stepping into a syscall. We also clear + * the nested task bit to avoid a spurious "task switch" + * on IRET. + */ + wrmsr64(MSR_IA32_FMASK, EFL_DF|EFL_IF|EFL_TF|EFL_NT); + + /* + * Set the Kermel GS base MSR to point to per-cpu data in uber-space. + * The uber-space handler (hi64_syscall) uses the swapgs instruction. + */ + wrmsr64(MSR_IA32_KERNEL_GS_BASE, UBER64(current_cpu_datap())); + kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n", + rdmsr64(MSR_IA32_KERNEL_GS_BASE)); +} + +/* + * Set MSRs for sysenter/sysexit + */ +void +fast_syscall_init(void) +{ + wrmsr(MSR_IA32_SYSENTER_CS, SYSENTER_CS, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, HI_TEXT(hi_sysenter), 0); + wrmsr(MSR_IA32_SYSENTER_ESP, current_sstk(), 0); } cpu_data_t * @@ -221,8 +479,10 @@ cpu_data_alloc(boolean_t is_boot_cpu) cdp->cpu_processor = cpu_processor_alloc(TRUE); cdp->cpu_pmap = pmap_cpu_alloc(TRUE); cdp->cpu_this = cdp; - cdp->cpu_int_stack_top = (vm_offset_t) eintstack; - mp_desc_init(cdp, TRUE); + cdp->cpu_is64bit = FALSE; + cdp->cpu_int_stack_top = (vm_offset_t) low_eintstack; + cpu_desc_init(cdp, TRUE); + fast_syscall_init(); } return cdp; } @@ -243,6 +503,9 @@ cpu_data_alloc(boolean_t is_boot_cpu) bzero((void*) cdp, sizeof(cpu_data_t)); cdp->cpu_this = cdp; + /* Propagate mode */ + cdp->cpu_is64bit = cpu_mode_is64bit(); + /* * Allocate interrupt stack: */ @@ -258,15 +521,28 @@ cpu_data_alloc(boolean_t is_boot_cpu) /* * Allocate descriptor table: + * Size depends on cpu mode. */ ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp->cpu_desc_tablep, - sizeof(struct mp_desc_table)); + cdp->cpu_is64bit ? sizeof(cpu_desc_table64_t) + : sizeof(cpu_desc_table_t)); if (ret != KERN_SUCCESS) { printf("cpu_data_alloc() desc_table failed, ret=%d\n", ret); goto abort; } + /* + * Allocate LDT + */ + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &cdp->cpu_ldtp, + sizeof(struct real_descriptor) * LDTSZ); + if (ret != KERN_SUCCESS) { + printf("cpu_data_alloc() ldt failed, ret=%d\n", ret); + goto abort; + } + simple_lock(&cpu_lock); if (real_ncpus >= max_ncpus) { simple_unlock(&cpu_lock); @@ -276,10 +552,11 @@ cpu_data_alloc(boolean_t is_boot_cpu) cdp->cpu_number = real_ncpus; real_ncpus++; simple_unlock(&cpu_lock); - + kprintf("cpu_data_alloc(%d) 0x%x desc_table: 0x%x " + "ldt: 0x%x " "int_stack: 0x%x-0x%x\n", - cdp->cpu_number, cdp, cdp->cpu_desc_tablep, + cdp->cpu_number, cdp, cdp->cpu_desc_tablep, cdp->cpu_ldtp, cdp->cpu_int_stack_top - INTSTACK_SIZE, cdp->cpu_int_stack_top); return cdp; @@ -313,3 +590,121 @@ valid_user_segment_selectors(uint16_t cs, valid_user_data_selector(gs); } + +static vm_offset_t user_window_base = 0; +static vm_offset_t phys_window_base = 0; + +void +cpu_window_init(int cpu) +{ + cpu_data_t *cdp = cpu_data_ptr[cpu]; + cpu_desc_index_t *cdi; + vm_offset_t user_window; + vm_offset_t phys_window; + vm_offset_t vaddr; + int num_cpus; + + num_cpus = ml_get_max_cpus(); + + if (cpu >= num_cpus) + panic("copy_window_init: cpu > num_cpus"); + + if (user_window_base == 0) { + + if (vm_allocate(kernel_map, &vaddr, + (NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE, + VM_FLAGS_ANYWHERE) != KERN_SUCCESS) + panic("copy_window_init: " + "couldn't allocate user map window"); + + /* + * window must start on a page table boundary + * in the virtual address space + */ + user_window_base = (vaddr + (NBPDE - 1)) & ~(NBPDE - 1); + + /* + * get rid of any allocation leading up to our + * starting boundary + */ + vm_deallocate(kernel_map, vaddr, user_window_base - vaddr); + + /* + * get rid of tail that we don't need + */ + user_window = user_window_base + + (NBPDE * NCOPY_WINDOWS * num_cpus); + + vm_deallocate(kernel_map, user_window, + (vaddr + + ((NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE)) - + user_window); + + if (vm_allocate(kernel_map, &phys_window_base, + PAGE_SIZE * num_cpus, VM_FLAGS_ANYWHERE) + != KERN_SUCCESS) + panic("copy_window_init: " + "couldn't allocate phys map window"); + } + + user_window = user_window_base + (cpu * NCOPY_WINDOWS * NBPDE); + phys_window = phys_window_base + (cpu * PAGE_SIZE); + + cdi = &cdp->cpu_desc_index; + + cdp->cpu_copywindow_base = user_window; + cdp->cpu_copywindow_pdp = pmap_pde(kernel_pmap, user_window); + + cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)] = userwindow_desc_pattern; + cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)].offset = user_window; + + fix_desc(&cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)], 1); + + cdp->cpu_physwindow_base = phys_window; + cdp->cpu_physwindow_ptep = vtopte(phys_window); + + cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)] = physwindow_desc_pattern; + cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)].offset = phys_window; + + fix_desc(&cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)], 1); + +} + + +typedef struct { + uint16_t length; + uint32_t offset[2]; +} __attribute__((__packed__)) table_descriptor64_t; + +extern table_descriptor64_t gdtptr64; +extern table_descriptor64_t idtptr64; +/* + * Load the segment descriptor tables for the current processor. + */ +void +cpu_desc_load64(cpu_data_t *cdp) +{ + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + + /* + * Load up the new descriptors etc + * ml_load_desc64() expects these global pseudo-descriptors: + * gdtptr64 -> master_gdt + * idtptr64 -> master_idt64 + * These are 10-byte descriptors with 64-bit addresses into + * uber-space. + */ + gdtptr64.length = sizeof(master_gdt) - 1; + gdtptr64.offset[0] = (uint32_t) cdi->cdi_gdt; + gdtptr64.offset[1] = KERNEL_UBER_BASE_HI32; + idtptr64.length = sizeof(master_idt64) - 1; + idtptr64.offset[0] = (uint32_t) cdi->cdi_idt; + idtptr64.offset[1] = KERNEL_UBER_BASE_HI32; + + /* Make sure busy bit is cleared in the TSS */ + gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY; + + ml_load_desc64(); + + kprintf("64-bit descriptor tables loaded\n"); +} diff --git a/osfmk/i386/mp_desc.h b/osfmk/i386/mp_desc.h index 3a89fad07..c7916f1c9 100644 --- a/osfmk/i386/mp_desc.h +++ b/osfmk/i386/mp_desc.h @@ -77,26 +77,47 @@ __BEGIN_DECLS * Note that dbtss could be conditionalized on MACH_KDB, but * doing so increases misconfiguration risk. */ -struct mp_desc_table { - struct fake_descriptor idt[IDTSZ]; /* IDT */ - struct fake_descriptor gdt[GDTSZ]; /* GDT */ - struct fake_descriptor ldt[LDTSZ]; /* LDT */ - struct i386_tss ktss; - struct i386_tss dbtss; -}; +typedef struct cpu_desc_table { + struct fake_descriptor idt[IDTSZ] __attribute__ ((aligned (16))); + struct fake_descriptor gdt[GDTSZ] __attribute__ ((aligned (16))); + struct i386_tss ktss __attribute__ ((aligned (16))); + struct i386_tss dbtss __attribute__ ((aligned (16))); + struct sysenter_stack sstk; +} cpu_desc_table_t; + +typedef struct cpu_desc_table64 { + struct fake_descriptor64 idt[IDTSZ] __attribute__ ((aligned (16))); + struct fake_descriptor gdt[GDTSZ] __attribute__ ((aligned (16))); + struct x86_64_tss ktss __attribute__ ((aligned (16))); + struct sysenter_stack sstk __attribute__ ((aligned (16))); + uint8_t dfstk[PAGE_SIZE] __attribute__ ((aligned (16))); +} cpu_desc_table64_t; #define current_gdt() (current_cpu_datap()->cpu_desc_index.cdi_gdt) #define current_idt() (current_cpu_datap()->cpu_desc_index.cdi_idt) #define current_ldt() (current_cpu_datap()->cpu_desc_index.cdi_ldt) #define current_ktss() (current_cpu_datap()->cpu_desc_index.cdi_ktss) #define current_dbtss() (current_cpu_datap()->cpu_desc_index.cdi_dbtss) +#define current_sstk() (current_cpu_datap()->cpu_desc_index.cdi_sstk) + +#define current_ktss64() ((struct x86_64_tss *) current_ktss()) +#define current_sstk64() ((addr64_t *) current_sstk()) #define gdt_desc_p(sel) \ ((struct real_descriptor *)¤t_gdt()[sel_idx(sel)]) #define ldt_desc_p(sel) \ ((struct real_descriptor *)¤t_ldt()[sel_idx(sel)]) -extern void mp_desc_init(cpu_data_t *cdp, boolean_t is_boot_cpu); +extern void cpu_desc_init( + cpu_data_t *cdp, + boolean_t is_boot_cpu); +extern void cpu_desc_init64( + cpu_data_t *cdp, + boolean_t is_boot_cpu); +extern void cpu_desc_load64( + cpu_data_t *cdp); +extern void fast_syscall_init(void); +extern void fast_syscall_init64(void); static inline boolean_t valid_user_data_selector(uint16_t selector) diff --git a/osfmk/i386/mp_events.h b/osfmk/i386/mp_events.h index 62da36894..aa3b4f2ee 100644 --- a/osfmk/i386/mp_events.h +++ b/osfmk/i386/mp_events.h @@ -36,6 +36,7 @@ typedef enum { MP_RENDEZVOUS, MP_IDLE, MP_UNIDLE, + MP_CHUD, MP_LAST } mp_event_t; @@ -48,6 +49,7 @@ const char *mp_event_name[] = { \ "MP_RENDEZVOUS", \ "MP_IDLE", \ "MP_UNIDLE", \ + "MP_CHUD", \ "MP_LAST" \ } diff --git a/osfmk/i386/mp_slave_boot.s b/osfmk/i386/mp_slave_boot.s index bef238b67..c1a8d8ccf 100644 --- a/osfmk/i386/mp_slave_boot.s +++ b/osfmk/i386/mp_slave_boot.s @@ -61,75 +61,72 @@ .text .align 12 // Page align for single bcopy_phys() -#define LJMP(segment,address) \ - .byte 0xea ;\ - .long address-EXT(slave_boot_base) ;\ +#define operand_size_prefix .byte 0x66 +#define address_size_prefix .byte 0x67 + +#define LJMP(segment,address) \ + operand_size_prefix ;\ + .byte 0xea ;\ + .long address-EXT(slave_boot_base) ;\ .word segment -#define LGDT(address) \ - .word 0x010f ;\ - .byte 0x15 ;\ +#define LGDT(address) \ + address_size_prefix ;\ + operand_size_prefix ;\ + .word 0x010f ;\ + .byte 0x15 ;\ .long address-EXT(slave_boot_base) Entry(slave_boot_base) /* code is loaded at 0x0:0x1000 */ /* ljmp to the next instruction to set up %cs */ - data16 - LJMP(MP_BOOTSEG, EXT(slave_pstart)) + LJMP(MP_BOOTSEG, EXT(slave_rstart)) -Entry(slave_pstart) +Entry(slave_rstart) /* set up %ds */ mov %cs, %ax mov %ax, %ds - POSTCODE(SLAVE_PSTART_ENTRY); + POSTCODE(SLAVE_RSTART_ENTRY); /* set up %ss and %esp */ - data16 - mov $(MP_BOOTSEG), %eax + mov %cs, %ax mov %ax, %ss - data16 mov $(MP_BOOTSTACK), %esp /*set up %es */ mov %ax, %es /* change to protected mode */ - data16 - call EXT(real_to_prot) + operand_size_prefix + call EXT(slave_real_to_prot) push MP_MACH_START - call EXT(startprog) + call EXT(slave_startprog) /* - real_to_prot() + slave_real_to_prot() transfer from real mode to protected mode. */ -Entry(real_to_prot) +Entry(slave_real_to_prot) /* guarantee that interrupt is disabled when in prot mode */ cli - POSTCODE(REAL_TO_PROT_ENTRY); + POSTCODE(SLAVE_REAL_TO_PROT_ENTRY); /* load the gdtr */ - addr16 - data16 LGDT(EXT(gdtr)) /* load the gdtr */ /* set the PE bit of CR0 */ mov %cr0, %eax - - data16 or $(CR0_PE_ON), %eax mov %eax, %cr0 /* make intrasegment jump to flush the processor pipeline and */ /* reload CS register */ - data16 LJMP(0x08, xprot) - xprot: /* we are in USE32 mode now */ @@ -139,20 +136,20 @@ xprot: movw %ax, %ss movw %ax, %es - POSTCODE(REAL_TO_PROT_EXIT); + POSTCODE(SLAVE_REAL_TO_PROT_EXIT); ret /* - startprog(phyaddr) + slave_startprog(phyaddr) start the program on protected mode where phyaddr is the entry point */ -Entry(startprog) +Entry(slave_startprog) push %ebp movl %esp, %ebp - POSTCODE(STARTPROG_ENTRY); + POSTCODE(SLAVE_STARTPROG_ENTRY); movl 0x8(%ebp), %ecx /* entry offset */ movl $0x28, %ebx /* segment */ @@ -164,7 +161,7 @@ Entry(startprog) movw %bx, %ds movw %bx, %es - POSTCODE(STARTPROG_EXIT); + POSTCODE(SLAVE_STARTPROG_EXIT); lret @@ -198,7 +195,7 @@ Entry(Gdt) .byte 0,0x9e,0x0,0 .word 0xffff,0 /* 0x20 : init data */ - .byte 0,0x93,0xcf,0 + .byte 0,0x9f,0xcf,0 .word 0xffff,0 /* 0x28 : init code */ .byte 0,0x9f,0xcf,0 diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index ca1a170f4..6ca7bff6b 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -90,24 +90,37 @@ #include #include #include - +#include /* * Maps state flavor to number of words in the state: */ -__private_extern__ unsigned int _MachineStateCount[] = { - /* FLAVOR_LIST */ 0, - i386_NEW_THREAD_STATE_COUNT, - i386_FLOAT_STATE_COUNT, - i386_ISA_PORT_MAP_STATE_COUNT, - i386_V86_ASSIST_STATE_COUNT, - i386_REGS_SEGS_STATE_COUNT, - i386_THREAD_SYSCALL_STATE_COUNT, - /* THREAD_STATE_NONE */ 0, - i386_SAVED_STATE_COUNT, + /* FLAVOR_LIST */ + 0, + x86_THREAD_STATE32_COUNT, + x86_FLOAT_STATE32_COUNT, + x86_EXCEPTION_STATE32_COUNT, + x86_THREAD_STATE64_COUNT, + x86_FLOAT_STATE64_COUNT, + x86_EXCEPTION_STATE64_COUNT, + x86_THREAD_STATE_COUNT, + x86_FLOAT_STATE_COUNT, + x86_EXCEPTION_STATE_COUNT, + 0, + x86_SAVED_STATE32_COUNT, + x86_SAVED_STATE64_COUNT, + x86_DEBUG_STATE32_COUNT, + x86_DEBUG_STATE64_COUNT, + x86_DEBUG_STATE_COUNT }; +zone_t iss_zone32; /* zone for 32bit saved_state area */ +zone_t iss_zone64; /* zone for 64bit saved_state area */ +zone_t ids_zone32; /* zone for 32bit debug_state area */ +zone_t ids_zone64; /* zone for 64bit debug_state area */ + + /* Forward */ void act_machine_throughcall(thread_t thr_act); @@ -123,6 +136,279 @@ extern void Thread_continue(void); extern void Load_context( thread_t thread); + +static void +get_exception_state32(thread_t thread, x86_exception_state32_t *es); + +static void +get_exception_state64(thread_t thread, x86_exception_state64_t *es); + +static void +get_thread_state32(thread_t thread, x86_thread_state32_t *ts); + +static void +get_thread_state64(thread_t thread, x86_thread_state64_t *ts); + +static int +set_thread_state32(thread_t thread, x86_thread_state32_t *ts); + +static int +set_thread_state64(thread_t thread, x86_thread_state64_t *ts); + +/* + * Don't let an illegal value for dr7 get set. Specifically, + * check for undefined settings. Setting these bit patterns + * result in undefined behaviour and can lead to an unexpected + * TRCTRAP. + */ +static boolean_t +dr7_is_valid(uint32_t *dr7) +{ + int i; + uint32_t mask1, mask2; + + /* + * If the DE bit is set in CR4, R/W0-3 can be pattern + * "10B" to indicate i/o reads and write + */ + if (!(get_cr4() & CR4_DE)) + for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 4; + i++, mask1 <<= 4, mask2 <<= 4) + if ((*dr7 & mask1) == mask2) + return (FALSE); + + /* + * len0-3 pattern "10B" is ok for len on 64-bit. + */ + if (current_cpu_datap()->cpu_is64bit == TRUE) + for (i = 0, mask1 = 0x3<<18, mask2 = 0x2<<18; i < 4; + i++, mask1 <<= 4, mask2 <<= 4) + if ((*dr7 & mask1) == mask2) + return (FALSE); + + /* + * if we are doing an instruction execution break (indicated + * by r/w[x] being "00B"), then the len[x] must also be set + * to "00B" + */ + for (i = 0; i < 4; i++) + if (((((*dr7 >> (16 + i*4))) & 0x3) == 0) && + ((((*dr7 >> (18 + i*4))) & 0x3) != 0)) + return (FALSE); + + /* + * Intel docs have these bits fixed. + */ + *dr7 |= 0x1 << 10; /* set bit 10 to 1 */ + *dr7 &= ~(0x1 << 11); /* set bit 11 to 0 */ + *dr7 &= ~(0x1 << 12); /* set bit 12 to 0 */ + *dr7 &= ~(0x1 << 14); /* set bit 14 to 0 */ + *dr7 &= ~(0x1 << 15); /* set bit 15 to 0 */ + + /* + * We don't allow anything to set the global breakpoints. + */ + + if (*dr7 & 0x2) + return (FALSE); + + if (*dr7 & (0x2<<2)) + return (FALSE); + + if (*dr7 & (0x2<<4)) + return (FALSE); + + if (*dr7 & (0x2<<6)) + return (FALSE); + + return (TRUE); +} + +static inline void +set_live_debug_state32(cpu_data_t *cdp, x86_debug_state32_t *ds) +{ + __asm__ volatile ("movl %0,%%db0" : :"r" (ds->dr0)); + __asm__ volatile ("movl %0,%%db1" : :"r" (ds->dr1)); + __asm__ volatile ("movl %0,%%db2" : :"r" (ds->dr2)); + __asm__ volatile ("movl %0,%%db3" : :"r" (ds->dr3)); + if (cpu_mode_is64bit()) + cdp->cpu_dr7 = ds->dr7; +} + +extern void set_64bit_debug_regs(x86_debug_state64_t *ds); + +static inline void +set_live_debug_state64(cpu_data_t *cdp, x86_debug_state64_t *ds) +{ + /* + * We need to enter 64-bit mode in order to set the full + * width of these registers + */ + set_64bit_debug_regs(ds); + cdp->cpu_dr7 = ds->dr7; +} + +static kern_return_t +set_debug_state32(thread_t thread, x86_debug_state32_t *ds) +{ + x86_debug_state32_t *ids; + pcb_t pcb; + + pcb = thread->machine.pcb; + ids = pcb->ids; + + if (ids == NULL) { + ids = zalloc(ids_zone32); + bzero(ids, sizeof *ids); + + simple_lock(&pcb->lock); + /* make sure it wasn't already alloc()'d elsewhere */ + if (pcb->ids == NULL) { + pcb->ids = ids; + simple_unlock(&pcb->lock); + } else { + simple_unlock(&pcb->lock); + zfree(ids_zone32, ids); + } + } + + if (!dr7_is_valid(&ds->dr7)) + goto err; + + /* + * Only allow local breakpoints and make sure they are not + * in the trampoline code. + */ + + if (ds->dr7 & 0x1) + if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE) + goto err; + + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE) + goto err; + + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE) + goto err; + + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE) + goto err; + + ids->dr0 = ds->dr0; + ids->dr1 = ds->dr1; + ids->dr2 = ds->dr2; + ids->dr3 = ds->dr3; + ids->dr6 = ds->dr6; + ids->dr7 = ds->dr7; + + return (KERN_SUCCESS); + +err: + return (KERN_INVALID_ARGUMENT); +} + +static kern_return_t +set_debug_state64(thread_t thread, x86_debug_state64_t *ds) +{ + x86_debug_state64_t *ids; + pcb_t pcb; + + pcb = thread->machine.pcb; + ids = pcb->ids; + + if (ids == NULL) { + ids = zalloc(ids_zone64); + bzero(ids, sizeof *ids); + + simple_lock(&pcb->lock); + /* make sure it wasn't already alloc()'d elsewhere */ + if (pcb->ids == NULL) { + pcb->ids = ids; + simple_unlock(&pcb->lock); + } else { + simple_unlock(&pcb->lock); + zfree(ids_zone64, ids); + } + } + + if (!dr7_is_valid((uint32_t *)&ds->dr7)) + goto err; + + /* + * Don't allow the user to set debug addresses above their max + * value + */ + if (ds->dr7 & 0x1) + if (ds->dr0 >= VM_MAX_PAGE_ADDRESS) + goto err; + + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= VM_MAX_PAGE_ADDRESS) + goto err; + + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= VM_MAX_PAGE_ADDRESS) + goto err; + + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= VM_MAX_PAGE_ADDRESS) + goto err; + + ids->dr0 = ds->dr0; + ids->dr1 = ds->dr1; + ids->dr2 = ds->dr2; + ids->dr3 = ds->dr3; + ids->dr6 = ds->dr6; + ids->dr7 = ds->dr7; + + return (KERN_SUCCESS); + +err: + return (KERN_INVALID_ARGUMENT); +} + +static void +get_debug_state32(thread_t thread, x86_debug_state32_t *ds) +{ + x86_debug_state32_t *saved_state; + + saved_state = thread->machine.pcb->ids; + + if (saved_state) { + ds->dr0 = saved_state->dr0; + ds->dr1 = saved_state->dr1; + ds->dr2 = saved_state->dr2; + ds->dr3 = saved_state->dr3; + ds->dr4 = saved_state->dr4; + ds->dr5 = saved_state->dr5; + ds->dr6 = saved_state->dr6; + ds->dr7 = saved_state->dr7; + } else + bzero(ds, sizeof *ds); +} + +static void +get_debug_state64(thread_t thread, x86_debug_state64_t *ds) +{ + x86_debug_state64_t *saved_state; + + saved_state = (x86_debug_state64_t *)thread->machine.pcb->ids; + + if (saved_state) { + ds->dr0 = saved_state->dr0; + ds->dr1 = saved_state->dr1; + ds->dr2 = saved_state->dr2; + ds->dr3 = saved_state->dr3; + ds->dr4 = saved_state->dr4; + ds->dr5 = saved_state->dr5; + ds->dr6 = saved_state->dr6; + ds->dr7 = saved_state->dr7; + } else + bzero(ds, sizeof *ds); +} + /* * consider_machine_collect: * @@ -139,92 +425,151 @@ consider_machine_adjust(void) } -// DEBUG -int DEBUG_kldt = 0; -int DEBUG_uldt = 0; static void act_machine_switch_pcb( thread_t new ) { - pcb_t pcb = new->machine.pcb; - int mycpu; - register iopb_tss_t tss = pcb->ims.io_tss; + pcb_t pcb = new->machine.pcb; + struct real_descriptor *ldtp; vm_offset_t pcb_stack_top; - register user_ldt_t uldt = pcb->ims.ldt; + vm_offset_t hi_pcb_stack_top; + vm_offset_t hi_iss; + cpu_data_t *cdp = current_cpu_datap(); - assert(new->kernel_stack != 0); - STACK_IEL(new->kernel_stack)->saved_state = - &new->machine.pcb->iss; + assert(new->kernel_stack != 0); + STACK_IEL(new->kernel_stack)->saved_state = pcb->iss; - /* - * Save a pointer to the top of the "kernel" stack - - * actually the place in the PCB where a trap into - * kernel mode will push the registers. - * The location depends on V8086 mode. If we are - * not in V8086 mode, then a trap into the kernel - * won`t save the v86 segments, so we leave room. - */ + if (!cpu_mode_is64bit()) { + x86_saved_state32_tagged_t *hi_iss32; - pcb_stack_top = (pcb->iss.efl & EFL_VM) - ? (int) (&pcb->iss + 1) - : (int) (&pcb->iss.v86_segs); - - mp_disable_preemption(); - mycpu = cpu_number(); - - if (tss == 0) { - /* - * No per-thread IO permissions. - * Use standard kernel TSS. - */ - if (!(gdt_desc_p(KERNEL_TSS)->access & ACC_TSS_BUSY)) - set_tr(KERNEL_TSS); - current_ktss()->esp0 = pcb_stack_top; - } - else { - /* - * Set the IO permissions. Use this thread`s TSS. - */ - *gdt_desc_p(USER_TSS) - = *(struct real_descriptor *)tss->iopb_desc; - tss->tss.esp0 = pcb_stack_top; - set_tr(USER_TSS); - gdt_desc_p(KERNEL_TSS)->access &= ~ ACC_TSS_BUSY; - } + /* + * Save a pointer to the top of the "kernel" stack - + * actually the place in the PCB where a trap into + * kernel mode will push the registers. + */ + hi_iss = (vm_offset_t)((unsigned long) + pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) | + ((unsigned long)pcb->iss & PAGE_MASK)); + + cdp->cpu_hi_iss = (void *)hi_iss; - /* - * Set the thread`s LDT or LDT entry. - */ - if (uldt == 0) { - struct real_descriptor *ldtp; - /* - * Use system LDT. - */ - // Set up the tasks specific ldt entries if extant - ldtp = (struct real_descriptor *)current_ldt(); - ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; - if (pcb->uldt_selector != 0) - ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; - set_ldt(KERNEL_LDT); - } - else { - /* - * Thread has its own LDT. // THIS SHOULD BE REMOVED!!!! - */ - *gdt_desc_p(USER_LDT) = uldt->desc; - set_ldt(USER_LDT); - /*debug*/ - if ((DEBUG_uldt++ % 0x7fff) == 0) - printf("KERNEL----> setting user ldt"); + pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0); + pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1); + + hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss; + assert(hi_iss32->tag == x86_SAVED_STATE32); + + hi_pcb_stack_top = (int) (hi_iss32 + 1); + + /* + * For fast syscall, top of interrupt stack points to pcb stack + */ + *(vm_offset_t *) current_sstk() = hi_pcb_stack_top; + + current_ktss()->esp0 = hi_pcb_stack_top; +/* XXX: This check is performed against the thread save state flavor rather than the + * task's 64-bit feature flag because of the thread/task 64-bit state divergence + * that can arise in task_set_64bit() on x86. When that is addressed, we can + * revert to checking the task 64 bit feature flag. The assert below is retained + * for that reason. + */ + } else if (is_saved_state64(pcb->iss)) { + x86_saved_state64_tagged_t *iss64; + vm_offset_t isf; + + assert(is_saved_state64(pcb->iss)); + + iss64 = (x86_saved_state64_tagged_t *) pcb->iss; + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &iss64->state.isf; + cdp->cpu_uber.cu_isf = UBER64(isf); + pcb_stack_top = (vm_offset_t) (iss64 + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = UBER64(pcb_stack_top); + + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = UBER64(pcb_stack_top); + + cdp->cpu_task_map = new->map->pmap->pm_kernel_cr3 ? + TASK_MAP_64BIT_SHARED : TASK_MAP_64BIT; + + /* + * Enable the 64-bit user code segment, USER64_CS. + */ + ldt_desc_p(USER64_CS)->access |= ACC_PL_U; + + } else { + x86_saved_state_compat32_t *iss32compat; + vm_offset_t isf; + + assert(is_saved_state32(pcb->iss)); + iss32compat = (x86_saved_state_compat32_t *) pcb->iss; + + pcb_stack_top = (int) (iss32compat + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by debug trap handler. + */ + isf = (vm_offset_t) &iss32compat->isf64; + cdp->cpu_uber.cu_isf = UBER64(isf); + + /* Top of temporary sysenter stack points to pcb stack */ + *current_sstk64() = UBER64(pcb_stack_top); + + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = UBER64(pcb_stack_top); + + cdp->cpu_task_map = TASK_MAP_32BIT; + + /* + * Disable USER64_CS + */ + ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; } - mp_enable_preemption(); /* - * Load the floating-point context, if necessary. + * Set the thread`s cthread (a.k.a pthread) + * For 32-bit user this involves setting the USER_CTHREAD + * descriptor in the LDT to point to the cthread data. + * The involves copying in the pre-initialized descriptor. + */ + ldtp = (struct real_descriptor *)current_ldt(); + ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; + if (pcb->uldt_selector != 0) + ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; + /* + * For 64-bit, we additionally set the 64-bit User GS base + * address. On return to 64-bit user, the GS.Base MSR will be written. */ - fpu_load_context(pcb); + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + /* + * Set the thread`s LDT or LDT entry. + */ + if (new->task == TASK_NULL || new->task->i386_ldt == 0) { + /* + * Use system LDT. + */ + ml_cpu_set_ldt(KERNEL_LDT); + } else { + /* + * Task has its own LDT. + */ + user_ldt_set(new); + } } /* @@ -234,6 +579,7 @@ void machine_load_context( thread_t new) { + new->machine.specFlags |= OnProc; act_machine_switch_pcb(new); Load_context(new); } @@ -258,23 +604,22 @@ machine_switch_context( */ fpu_save_context(old); + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; + /* * Switch address maps if need be, even if not switching tasks. * (A server activation may be "borrowing" a client map.) */ - { - int mycpu = cpu_number(); - - PMAP_SWITCH_CONTEXT(old, new, mycpu) - } + PMAP_SWITCH_CONTEXT(old, new, cpu_number()) /* * Load the rest of the user state for the new thread */ act_machine_switch_pcb(new); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, - (int)old, (int)new, old->sched_pri, new->sched_pri, 0); - old->continuation = NULL; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, + (int)old, (int)new, old->sched_pri, new->sched_pri, 0); return(Switch_context(old, continuation, new)); } @@ -297,12 +642,209 @@ kern_return_t machine_thread_state_initialize( thread_t thread) { -#pragma unused (thread) + /* + * If there's an fpu save area, free it. + * The initialized state will then be lazily faulted-in, if required. + * And if we're target, re-arm the no-fpu trap. + */ + if (thread->machine.pcb->ifps) { + (void) fpu_set_fxstate(thread, NULL); - return KERN_SUCCESS; + if (thread == current_thread()) + clear_fpu(); + } + return KERN_SUCCESS; } - - + +uint32_t +get_eflags_exportmask(void) +{ + return EFL_USER_SET; +} + +/* + * x86_SAVED_STATE32 - internal save/restore general register state on 32/64 bit processors + * for 32bit tasks only + * x86_SAVED_STATE64 - internal save/restore general register state on 64 bit processors + * for 64bit tasks only + * x86_THREAD_STATE32 - external set/get general register state on 32/64 bit processors + * for 32bit tasks only + * x86_THREAD_STATE64 - external set/get general register state on 64 bit processors + * for 64bit tasks only + * x86_SAVED_STATE - external set/get general register state on 32/64 bit processors + * for either 32bit or 64bit tasks + * x86_FLOAT_STATE32 - internal/external save/restore float and xmm state on 32/64 bit processors + * for 32bit tasks only + * x86_FLOAT_STATE64 - internal/external save/restore float and xmm state on 64 bit processors + * for 64bit tasks only + * x86_FLOAT_STATE - external save/restore float and xmm state on 32/64 bit processors + * for either 32bit or 64bit tasks + * x86_EXCEPTION_STATE32 - external get exception state on 32/64 bit processors + * for 32bit tasks only + * x86_EXCEPTION_STATE64 - external get exception state on 64 bit processors + * for 64bit tasks only + * x86_EXCEPTION_STATE - external get exception state on 323/64 bit processors + * for either 32bit or 64bit tasks + */ + + +static void +get_exception_state64(thread_t thread, x86_exception_state64_t *es) +{ + x86_saved_state64_t *saved_state; + + saved_state = USER_REGS64(thread); + + es->trapno = saved_state->isf.trapno; + es->err = saved_state->isf.err; + es->faultvaddr = saved_state->cr2; +} + +static void +get_exception_state32(thread_t thread, x86_exception_state32_t *es) +{ + x86_saved_state32_t *saved_state; + + saved_state = USER_REGS32(thread); + + es->trapno = saved_state->trapno; + es->err = saved_state->err; + es->faultvaddr = saved_state->cr2; +} + + +static int +set_thread_state32(thread_t thread, x86_thread_state32_t *ts) +{ + x86_saved_state32_t *saved_state; + + saved_state = USER_REGS32(thread); + + saved_state->eax = ts->eax; + saved_state->ebx = ts->ebx; + saved_state->ecx = ts->ecx; + saved_state->edx = ts->edx; + saved_state->edi = ts->edi; + saved_state->esi = ts->esi; + saved_state->ebp = ts->ebp; + saved_state->uesp = ts->esp; + saved_state->efl = (ts->eflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + saved_state->eip = ts->eip; + saved_state->cs = ts->cs ? ts->cs : USER_CS; + saved_state->ss = ts->ss ? ts->ss : USER_DS; + saved_state->ds = ts->ds ? ts->ds : USER_DS; + saved_state->es = ts->es ? ts->es : USER_DS; + saved_state->fs = ts->fs; + saved_state->gs = ts->gs; + + /* + * If the trace trap bit is being set, + * ensure that the user returns via iret + * - which is signaled thusly: + */ + if ((saved_state->efl & EFL_TF) && saved_state->cs == SYSENTER_CS) + saved_state->cs = SYSENTER_TF_CS; + + return(KERN_SUCCESS); +} + +static int +set_thread_state64(thread_t thread, x86_thread_state64_t *ts) +{ + x86_saved_state64_t *saved_state; + + saved_state = USER_REGS64(thread); + + if (!IS_USERADDR64_CANONICAL(ts->rsp) || + !IS_USERADDR64_CANONICAL(ts->rip)) + return(KERN_INVALID_ARGUMENT); + + saved_state->r8 = ts->r8; + saved_state->r9 = ts->r9; + saved_state->r10 = ts->r10; + saved_state->r11 = ts->r11; + saved_state->r12 = ts->r12; + saved_state->r13 = ts->r13; + saved_state->r14 = ts->r14; + saved_state->r15 = ts->r15; + saved_state->rax = ts->rax; + saved_state->rax = ts->rax; + saved_state->rbx = ts->rbx; + saved_state->rcx = ts->rcx; + saved_state->rdx = ts->rdx; + saved_state->rdi = ts->rdi; + saved_state->rsi = ts->rsi; + saved_state->rbp = ts->rbp; + saved_state->isf.rsp = ts->rsp; + saved_state->isf.rflags = (ts->rflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + saved_state->isf.rip = ts->rip; + saved_state->isf.cs = USER64_CS; + saved_state->fs = ts->fs; + saved_state->gs = ts->gs; + + return(KERN_SUCCESS); +} + + + +static void +get_thread_state32(thread_t thread, x86_thread_state32_t *ts) +{ + x86_saved_state32_t *saved_state; + + saved_state = USER_REGS32(thread); + + ts->eax = saved_state->eax; + ts->ebx = saved_state->ebx; + ts->ecx = saved_state->ecx; + ts->edx = saved_state->edx; + ts->edi = saved_state->edi; + ts->esi = saved_state->esi; + ts->ebp = saved_state->ebp; + ts->esp = saved_state->uesp; + ts->eflags = saved_state->efl; + ts->eip = saved_state->eip; + ts->cs = saved_state->cs; + ts->ss = saved_state->ss; + ts->ds = saved_state->ds; + ts->es = saved_state->es; + ts->fs = saved_state->fs; + ts->gs = saved_state->gs; +} + + +static void +get_thread_state64(thread_t thread, x86_thread_state64_t *ts) +{ + x86_saved_state64_t *saved_state; + + saved_state = USER_REGS64(thread); + + ts->r8 = saved_state->r8; + ts->r9 = saved_state->r9; + ts->r10 = saved_state->r10; + ts->r11 = saved_state->r11; + ts->r12 = saved_state->r12; + ts->r13 = saved_state->r13; + ts->r14 = saved_state->r14; + ts->r15 = saved_state->r15; + ts->rax = saved_state->rax; + ts->rbx = saved_state->rbx; + ts->rcx = saved_state->rcx; + ts->rdx = saved_state->rdx; + ts->rdi = saved_state->rdi; + ts->rsi = saved_state->rsi; + ts->rbp = saved_state->rbp; + ts->rsp = saved_state->isf.rsp; + ts->rflags = saved_state->isf.rflags; + ts->rip = saved_state->isf.rip; + ts->cs = saved_state->isf.cs; + ts->fs = saved_state->fs; + ts->gs = saved_state->gs; +} + + + /* * act_machine_set_state: * @@ -316,48 +858,29 @@ machine_thread_set_state( thread_state_t tstate, mach_msg_type_number_t count) { - int kernel_act = 0; - - switch (flavor) { - case THREAD_SYSCALL_STATE: - { - register struct thread_syscall_state *state; - register struct i386_saved_state *saved_state = USER_REGS(thr_act); - state = (struct thread_syscall_state *) tstate; - saved_state->eax = state->eax; - saved_state->edx = state->edx; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) | EFL_USER_SET; - saved_state->eip = state->eip; - saved_state->uesp = state->esp; - break; - } - - case i386_SAVED_STATE: + switch (flavor) + { + case x86_SAVED_STATE32: { - register struct i386_saved_state *state; - register struct i386_saved_state *saved_state; + x86_saved_state32_t *state; + x86_saved_state32_t *saved_state; - if (count < i386_SAVED_STATE_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + if (count < x86_SAVED_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_saved_state *) tstate; + state = (x86_saved_state32_t *) tstate; /* Check segment selectors are safe */ - if (!kernel_act && - !valid_user_segment_selectors(state->cs, + if (!valid_user_segment_selectors(state->cs, state->ss, state->ds, state->es, state->fs, state->gs)) - return KERN_INVALID_ARGUMENT; + return KERN_INVALID_ARGUMENT; - saved_state = USER_REGS(thr_act); + saved_state = USER_REGS32(thr_act); /* * General registers @@ -371,271 +894,244 @@ machine_thread_set_state( saved_state->ecx = state->ecx; saved_state->eax = state->eax; saved_state->eip = state->eip; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) - | EFL_USER_SET; + + saved_state->efl = (state->efl & ~EFL_USER_CLEAR) | EFL_USER_SET; /* - * Segment registers. Set differently in V8086 mode. + * If the trace trap bit is being set, + * ensure that the user returns via iret + * - which is signaled thusly: */ - if (state->efl & EFL_VM) { - /* - * Set V8086 mode segment registers. - */ - saved_state->cs = state->cs & 0xffff; - saved_state->ss = state->ss & 0xffff; - saved_state->v86_segs.v86_ds = state->ds & 0xffff; - saved_state->v86_segs.v86_es = state->es & 0xffff; - saved_state->v86_segs.v86_fs = state->fs & 0xffff; - saved_state->v86_segs.v86_gs = state->gs & 0xffff; - - /* - * Zero protected mode segment registers. - */ - saved_state->ds = 0; - saved_state->es = 0; - saved_state->fs = 0; - saved_state->gs = 0; - - if (thr_act->machine.pcb->ims.v86s.int_table) { - /* - * Hardware assist on. - */ - thr_act->machine.pcb->ims.v86s.flags = - state->efl & (EFL_TF | EFL_IF); - } - } - else if (kernel_act) { - /* - * 386 mode. Set segment registers for flat - * 32-bit address space. - */ - saved_state->cs = KERNEL_CS; - saved_state->ss = KERNEL_DS; - saved_state->ds = KERNEL_DS; - saved_state->es = KERNEL_DS; - saved_state->fs = KERNEL_DS; - saved_state->gs = CPU_DATA_GS; - } - else { - /* - * User setting segment registers. - * Code and stack selectors have already been - * checked. Others will be reset by 'iret' - * if they are not valid. - */ - saved_state->cs = state->cs; - saved_state->ss = state->ss; - saved_state->ds = state->ds; - saved_state->es = state->es; - saved_state->fs = state->fs; - saved_state->gs = state->gs; - } + if ((saved_state->efl & EFL_TF) && state->cs == SYSENTER_CS) + state->cs = SYSENTER_TF_CS; + + /* + * User setting segment registers. + * Code and stack selectors have already been + * checked. Others will be reset by 'iret' + * if they are not valid. + */ + saved_state->cs = state->cs; + saved_state->ss = state->ss; + saved_state->ds = state->ds; + saved_state->es = state->es; + saved_state->fs = state->fs; + saved_state->gs = state->gs; break; } - case i386_NEW_THREAD_STATE: - case i386_REGS_SEGS_STATE: + case x86_SAVED_STATE64: { - register struct i386_new_thread_state *state; - register struct i386_saved_state *saved_state; + x86_saved_state64_t *state; + x86_saved_state64_t *saved_state; - if (count < i386_NEW_THREAD_STATE_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + if (count < x86_SAVED_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_new_thread_state *) tstate; - - if (flavor == i386_REGS_SEGS_STATE) { - /* - * Code and stack selectors must not be null, - * and must have user protection levels. - * Only the low 16 bits are valid. - */ - state->cs &= 0xffff; - state->ss &= 0xffff; - state->ds &= 0xffff; - state->es &= 0xffff; - state->fs &= 0xffff; - state->gs &= 0xffff; - - if (!kernel_act && - !valid_user_segment_selectors(state->cs, - state->ss, - state->ds, - state->es, - state->fs, - state->gs)) + state = (x86_saved_state64_t *) tstate; + + /* Check segment selectors are safe XXX gs/fs? */ + if (!valid_user_code_selector(state->isf.cs) || + !valid_user_data_selector(state->fs) || + !valid_user_data_selector(state->gs)) + return KERN_INVALID_ARGUMENT; + + /* Check pc and stack are canonical addresses */ + if (!IS_USERADDR64_CANONICAL(state->isf.rsp) || + !IS_USERADDR64_CANONICAL(state->isf.rip)) return KERN_INVALID_ARGUMENT; - } - saved_state = USER_REGS(thr_act); + saved_state = USER_REGS64(thr_act); /* * General registers */ - saved_state->edi = state->edi; - saved_state->esi = state->esi; - saved_state->ebp = state->ebp; - saved_state->uesp = state->uesp; - saved_state->ebx = state->ebx; - saved_state->edx = state->edx; - saved_state->ecx = state->ecx; - saved_state->eax = state->eax; - saved_state->eip = state->eip; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) - | EFL_USER_SET; - - /* - * Segment registers. Set differently in V8086 mode. + saved_state->r8 = state->r8; + saved_state->r9 = state->r9; + saved_state->r10 = state->r10; + saved_state->r11 = state->r11; + saved_state->r12 = state->r12; + saved_state->r13 = state->r13; + saved_state->r14 = state->r14; + saved_state->r15 = state->r15; + saved_state->rdi = state->rdi; + saved_state->rsi = state->rsi; + saved_state->rbp = state->rbp; + saved_state->rbx = state->rbx; + saved_state->rdx = state->rdx; + saved_state->rcx = state->rcx; + saved_state->rax = state->rax; + saved_state->isf.rsp = state->isf.rsp; + saved_state->isf.rip = state->isf.rip; + + saved_state->isf.rflags = (state->isf.rflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + + /* + * User setting segment registers. + * Code and stack selectors have already been + * checked. Others will be reset by 'sys' + * if they are not valid. */ - if (state->efl & EFL_VM) { - /* - * Set V8086 mode segment registers. - */ - saved_state->cs = state->cs & 0xffff; - saved_state->ss = state->ss & 0xffff; - saved_state->v86_segs.v86_ds = state->ds & 0xffff; - saved_state->v86_segs.v86_es = state->es & 0xffff; - saved_state->v86_segs.v86_fs = state->fs & 0xffff; - saved_state->v86_segs.v86_gs = state->gs & 0xffff; - - /* - * Zero protected mode segment registers. - */ - saved_state->ds = 0; - saved_state->es = 0; - saved_state->fs = 0; - saved_state->gs = 0; - - if (thr_act->machine.pcb->ims.v86s.int_table) { - /* - * Hardware assist on. - */ - thr_act->machine.pcb->ims.v86s.flags = - state->efl & (EFL_TF | EFL_IF); - } - } - else if (flavor == i386_NEW_THREAD_STATE && kernel_act) { - /* - * 386 mode. Set segment registers for flat - * 32-bit address space. - */ - saved_state->cs = KERNEL_CS; - saved_state->ss = KERNEL_DS; - saved_state->ds = KERNEL_DS; - saved_state->es = KERNEL_DS; - saved_state->fs = KERNEL_DS; - saved_state->gs = CPU_DATA_GS; - } - else { - /* - * User setting segment registers. - * Code and stack selectors have already been - * checked. Others will be reset by 'iret' - * if they are not valid. - */ - saved_state->cs = state->cs; - saved_state->ss = state->ss; - saved_state->ds = state->ds; - saved_state->es = state->es; - saved_state->fs = state->fs; - saved_state->gs = state->gs; - } + saved_state->isf.cs = state->isf.cs; + saved_state->isf.ss = state->isf.ss; + saved_state->fs = state->fs; + saved_state->gs = state->gs; + break; } - case i386_FLOAT_STATE: { - if (count < i386_old_FLOAT_STATE_COUNT) + case x86_FLOAT_STATE32: + { + if (count != x86_FLOAT_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) return(KERN_INVALID_ARGUMENT); - if (count < i386_FLOAT_STATE_COUNT) - return fpu_set_state(thr_act,(struct i386_float_state*)tstate); - else return fpu_set_fxstate(thr_act,(struct i386_float_state*)tstate); + + return fpu_set_fxstate(thr_act, tstate); } - /* - * Temporary - replace by i386_io_map - */ - case i386_ISA_PORT_MAP_STATE: { - if (count < i386_ISA_PORT_MAP_STATE_COUNT) + case x86_FLOAT_STATE64: + { + if (count != x86_FLOAT_STATE64_COUNT) return(KERN_INVALID_ARGUMENT); - break; + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return fpu_set_fxstate(thr_act, tstate); } - case i386_V86_ASSIST_STATE: + case x86_FLOAT_STATE: { - register struct i386_v86_assist_state *state; - vm_offset_t int_table; - int int_count; + x86_float_state_t *state; + + if (count != x86_FLOAT_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); - if (count < i386_V86_ASSIST_STATE_COUNT) - return KERN_INVALID_ARGUMENT; + state = (x86_float_state_t *)tstate; - state = (struct i386_v86_assist_state *) tstate; - int_table = state->int_table; - int_count = state->int_count; + if (state->fsh.flavor == x86_FLOAT_STATE64 && state->fsh.count == x86_FLOAT_STATE64_COUNT && + thread_is_64bit(thr_act)) { + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + } + if (state->fsh.flavor == x86_FLOAT_STATE32 && state->fsh.count == x86_FLOAT_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); + } + return(KERN_INVALID_ARGUMENT); + } - if (int_table >= VM_MAX_ADDRESS || - int_table + - int_count * sizeof(struct v86_interrupt_table) - > VM_MAX_ADDRESS) - return KERN_INVALID_ARGUMENT; - thr_act->machine.pcb->ims.v86s.int_table = int_table; - thr_act->machine.pcb->ims.v86s.int_count = int_count; - thr_act->machine.pcb->ims.v86s.flags = - USER_REGS(thr_act)->efl & (EFL_TF | EFL_IF); + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: + { + if (count != x86_THREAD_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return set_thread_state32(thr_act, (x86_thread_state32_t *)tstate); + } + + case x86_THREAD_STATE64: + { + if (count != x86_THREAD_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return set_thread_state64(thr_act, (x86_thread_state64_t *)tstate); + } + + case x86_THREAD_STATE: + { + x86_thread_state_t *state; + + if (count != x86_THREAD_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *)tstate; + + if (state->tsh.flavor == x86_THREAD_STATE64 && state->tsh.count == x86_THREAD_STATE64_COUNT && + thread_is_64bit(thr_act)) { + return set_thread_state64(thr_act, &state->uts.ts64); + } else if (state->tsh.flavor == x86_THREAD_STATE32 && state->tsh.count == x86_THREAD_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + return set_thread_state32(thr_act, &state->uts.ts32); + } else + return(KERN_INVALID_ARGUMENT); + break; } + case x86_DEBUG_STATE32: + { + x86_debug_state32_t *state; + kern_return_t ret; + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state32_t *)tstate; + + ret = set_debug_state32(thr_act, state); - case i386_THREAD_STATE: { - struct i386_saved_state *saved_state; - i386_thread_state_t *state25; - - saved_state = USER_REGS(thr_act); - state25 = (i386_thread_state_t *)tstate; - - saved_state->eax = state25->eax; - saved_state->ebx = state25->ebx; - saved_state->ecx = state25->ecx; - saved_state->edx = state25->edx; - saved_state->edi = state25->edi; - saved_state->esi = state25->esi; - saved_state->ebp = state25->ebp; - saved_state->uesp = state25->esp; - saved_state->efl = (state25->eflags & ~EFL_USER_CLEAR) - | EFL_USER_SET; - saved_state->eip = state25->eip; - saved_state->cs = USER_CS; /* FIXME? */ - saved_state->ss = USER_DS; - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = state25->fs; - saved_state->gs = state25->gs; + return ret; } - break; + case x86_DEBUG_STATE64: + { + x86_debug_state64_t *state; + kern_return_t ret; - default: + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state64_t *)tstate; + + ret = set_debug_state64(thr_act, state); + + return ret; + } + case x86_DEBUG_STATE: + { + x86_debug_state_t *state; + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if (count != x86_DEBUG_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_debug_state_t *)tstate; + if (state->dsh.flavor == x86_DEBUG_STATE64 && + state->dsh.count == x86_DEBUG_STATE64_COUNT && + thread_is_64bit(thr_act)) { + ret = set_debug_state64(thr_act, &state->uds.ds64); + } + else + if (state->dsh.flavor == x86_DEBUG_STATE32 && + state->dsh.count == x86_DEBUG_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + ret = set_debug_state32(thr_act, &state->uds.ds32); + } + return ret; + } + default: return(KERN_INVALID_ARGUMENT); } return(KERN_SUCCESS); } + + /* * thread_getstatus: * * Get the status of the specified thread. */ - kern_return_t machine_thread_get_state( thread_t thr_act, @@ -645,227 +1141,300 @@ machine_thread_get_state( { switch (flavor) { - case i386_SAVED_STATE: + case THREAD_STATE_FLAVOR_LIST: + { + if (*count < 3) + return (KERN_INVALID_ARGUMENT); + + tstate[0] = i386_THREAD_STATE; + tstate[1] = i386_FLOAT_STATE; + tstate[2] = i386_EXCEPTION_STATE; + + *count = 3; + break; + } + + case THREAD_STATE_FLAVOR_LIST_NEW: + { + if (*count < 4) + return (KERN_INVALID_ARGUMENT); + + tstate[0] = x86_THREAD_STATE; + tstate[1] = x86_FLOAT_STATE; + tstate[2] = x86_EXCEPTION_STATE; + tstate[3] = x86_DEBUG_STATE; + + *count = 4; + break; + } + + case x86_SAVED_STATE32: { - register struct i386_saved_state *state; - register struct i386_saved_state *saved_state; + x86_saved_state32_t *state; + x86_saved_state32_t *saved_state; - if (*count < i386_SAVED_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + if (*count < x86_SAVED_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_saved_state *) tstate; - saved_state = USER_REGS(thr_act); + state = (x86_saved_state32_t *) tstate; + saved_state = USER_REGS32(thr_act); /* * First, copy everything: */ *state = *saved_state; + state->ds = saved_state->ds & 0xffff; + state->es = saved_state->es & 0xffff; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; - if (saved_state->efl & EFL_VM) { - /* - * V8086 mode. - */ - state->ds = saved_state->v86_segs.v86_ds & 0xffff; - state->es = saved_state->v86_segs.v86_es & 0xffff; - state->fs = saved_state->v86_segs.v86_fs & 0xffff; - state->gs = saved_state->v86_segs.v86_gs & 0xffff; - - if (thr_act->machine.pcb->ims.v86s.int_table) { - /* - * Hardware assist on - */ - if ((thr_act->machine.pcb->ims.v86s.flags & - (EFL_IF|V86_IF_PENDING)) == 0) - state->efl &= ~EFL_IF; - } - } - else { - /* - * 386 mode. - */ - state->ds = saved_state->ds & 0xffff; - state->es = saved_state->es & 0xffff; - state->fs = saved_state->fs & 0xffff; - state->gs = saved_state->gs & 0xffff; - } - *count = i386_SAVED_STATE_COUNT; + *count = x86_SAVED_STATE32_COUNT; break; } - case i386_NEW_THREAD_STATE: - case i386_REGS_SEGS_STATE: + case x86_SAVED_STATE64: { - register struct i386_new_thread_state *state; - register struct i386_saved_state *saved_state; + x86_saved_state64_t *state; + x86_saved_state64_t *saved_state; - if (*count < i386_NEW_THREAD_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + if (*count < x86_SAVED_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_new_thread_state *) tstate; - saved_state = USER_REGS(thr_act); + state = (x86_saved_state64_t *)tstate; + saved_state = USER_REGS64(thr_act); /* - * General registers. + * First, copy everything: */ - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - state->ebx = saved_state->ebx; - state->edx = saved_state->edx; - state->ecx = saved_state->ecx; - state->eax = saved_state->eax; - state->eip = saved_state->eip; - state->efl = saved_state->efl; - state->uesp = saved_state->uesp; - - state->cs = saved_state->cs; - state->ss = saved_state->ss; - if (saved_state->efl & EFL_VM) { - /* - * V8086 mode. - */ - state->ds = saved_state->v86_segs.v86_ds & 0xffff; - state->es = saved_state->v86_segs.v86_es & 0xffff; - state->fs = saved_state->v86_segs.v86_fs & 0xffff; - state->gs = saved_state->v86_segs.v86_gs & 0xffff; - - if (thr_act->machine.pcb->ims.v86s.int_table) { - /* - * Hardware assist on - */ - if ((thr_act->machine.pcb->ims.v86s.flags & - (EFL_IF|V86_IF_PENDING)) == 0) - state->efl &= ~EFL_IF; - } - } - else { - /* - * 386 mode. - */ - state->ds = saved_state->ds & 0xffff; - state->es = saved_state->es & 0xffff; - state->fs = saved_state->fs & 0xffff; - state->gs = saved_state->gs & 0xffff; - } - *count = i386_NEW_THREAD_STATE_COUNT; + *state = *saved_state; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + + *count = x86_SAVED_STATE64_COUNT; break; } - case THREAD_SYSCALL_STATE: + case x86_FLOAT_STATE32: { - register struct thread_syscall_state *state; - register struct i386_saved_state *saved_state = USER_REGS(thr_act); - - state = (struct thread_syscall_state *) tstate; - state->eax = saved_state->eax; - state->edx = saved_state->edx; - state->efl = saved_state->efl; - state->eip = saved_state->eip; - state->esp = saved_state->uesp; - *count = i386_THREAD_SYSCALL_STATE_COUNT; - break; + if (*count < x86_FLOAT_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_FLOAT_STATE32_COUNT; + + return fpu_get_fxstate(thr_act, tstate); } - case THREAD_STATE_FLAVOR_LIST: - if (*count < 5) - return (KERN_INVALID_ARGUMENT); - tstate[0] = i386_NEW_THREAD_STATE; - tstate[1] = i386_FLOAT_STATE; - tstate[2] = i386_ISA_PORT_MAP_STATE; - tstate[3] = i386_V86_ASSIST_STATE; - tstate[4] = THREAD_SYSCALL_STATE; - *count = 5; + case x86_FLOAT_STATE64: + { + if (*count < x86_FLOAT_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_FLOAT_STATE64_COUNT; + + return fpu_get_fxstate(thr_act, tstate); + } + + case x86_FLOAT_STATE: + { + x86_float_state_t *state; + kern_return_t kret; + + if (*count < x86_FLOAT_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_float_state_t *)tstate; + + /* + * no need to bzero... currently + * x86_FLOAT_STATE64_COUNT == x86_FLOAT_STATE32_COUNT + */ + if (thread_is_64bit(thr_act)) { + state->fsh.flavor = x86_FLOAT_STATE64; + state->fsh.count = x86_FLOAT_STATE64_COUNT; + + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + } else { + state->fsh.flavor = x86_FLOAT_STATE32; + state->fsh.count = x86_FLOAT_STATE32_COUNT; + + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); + } + *count = x86_FLOAT_STATE_COUNT; + + return(kret); + } + + + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: + { + if (*count < x86_THREAD_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_THREAD_STATE32_COUNT; + + get_thread_state32(thr_act, (x86_thread_state32_t *)tstate); break; + } - case i386_FLOAT_STATE: { - if (*count < i386_old_FLOAT_STATE_COUNT) + case x86_THREAD_STATE64: + { + if (*count < x86_THREAD_STATE64_COUNT) return(KERN_INVALID_ARGUMENT); - if (*count< i386_FLOAT_STATE_COUNT) { - *count = i386_old_FLOAT_STATE_COUNT; - return fpu_get_state(thr_act,(struct i386_float_state *)tstate); - } else { - *count = i386_FLOAT_STATE_COUNT; - return fpu_get_fxstate(thr_act,(struct i386_float_state *)tstate); - } + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_THREAD_STATE64_COUNT; + + get_thread_state64(thr_act, (x86_thread_state64_t *)tstate); + break; } - /* - * Temporary - replace by i386_io_map - */ - case i386_ISA_PORT_MAP_STATE: { - register struct i386_isa_port_map_state *state; - register iopb_tss_t tss; + case x86_THREAD_STATE: + { + x86_thread_state_t *state; - if (*count < i386_ISA_PORT_MAP_STATE_COUNT) + if (*count < x86_THREAD_STATE_COUNT) return(KERN_INVALID_ARGUMENT); - state = (struct i386_isa_port_map_state *) tstate; - tss = thr_act->machine.pcb->ims.io_tss; + state = (x86_thread_state_t *)tstate; - if (tss == 0) { - unsigned int i; + bzero((char *)state, sizeof(x86_thread_state_t)); - /* - * The thread has no ktss, so no IO permissions. - */ + if (thread_is_64bit(thr_act)) { + state->tsh.flavor = x86_THREAD_STATE64; + state->tsh.count = x86_THREAD_STATE64_COUNT; - for (i = 0; i < sizeof state->pm; i++) - state->pm[i] = 0xff; + get_thread_state64(thr_act, &state->uts.ts64); } else { - /* - * The thread has its own ktss. - */ + state->tsh.flavor = x86_THREAD_STATE32; + state->tsh.count = x86_THREAD_STATE32_COUNT; - bcopy((char *) tss->bitmap, - (char *) state->pm, - sizeof state->pm); + get_thread_state32(thr_act, &state->uts.ts32); } + *count = x86_THREAD_STATE_COUNT; + + break; + } + + + case x86_EXCEPTION_STATE32: + { + if (*count < x86_EXCEPTION_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_EXCEPTION_STATE32_COUNT; - *count = i386_ISA_PORT_MAP_STATE_COUNT; + get_exception_state32(thr_act, (x86_exception_state32_t *)tstate); break; } - case i386_V86_ASSIST_STATE: + case x86_EXCEPTION_STATE64: { - register struct i386_v86_assist_state *state; + if (*count < x86_EXCEPTION_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); - if (*count < i386_V86_ASSIST_STATE_COUNT) - return KERN_INVALID_ARGUMENT; + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_v86_assist_state *) tstate; - state->int_table = thr_act->machine.pcb->ims.v86s.int_table; - state->int_count = thr_act->machine.pcb->ims.v86s.int_count; + *count = x86_EXCEPTION_STATE64_COUNT; - *count = i386_V86_ASSIST_STATE_COUNT; + get_exception_state64(thr_act, (x86_exception_state64_t *)tstate); break; } - case i386_THREAD_STATE: { - struct i386_saved_state *saved_state; - i386_thread_state_t *state; - - saved_state = USER_REGS(thr_act); - state = (i386_thread_state_t *)tstate; - - state->eax = saved_state->eax; - state->ebx = saved_state->ebx; - state->ecx = saved_state->ecx; - state->edx = saved_state->edx; - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - state->esp = saved_state->uesp; - state->eflags = saved_state->efl; - state->eip = saved_state->eip; - state->cs = saved_state->cs; - state->ss = saved_state->ss; - state->ds = saved_state->ds; - state->es = saved_state->es; - state->fs = saved_state->fs; - state->gs = saved_state->gs; + case x86_EXCEPTION_STATE: + { + x86_exception_state_t *state; + + if (*count < x86_EXCEPTION_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_exception_state_t *)tstate; + + bzero((char *)state, sizeof(x86_exception_state_t)); + + if (thread_is_64bit(thr_act)) { + state->esh.flavor = x86_EXCEPTION_STATE64; + state->esh.count = x86_EXCEPTION_STATE64_COUNT; + + get_exception_state64(thr_act, &state->ues.es64); + } else { + state->esh.flavor = x86_EXCEPTION_STATE32; + state->esh.count = x86_EXCEPTION_STATE32_COUNT; + + get_exception_state32(thr_act, &state->ues.es32); + } + *count = x86_EXCEPTION_STATE_COUNT; + + break; + } + case x86_DEBUG_STATE32: + { + if (*count < x86_DEBUG_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + get_debug_state32(thr_act, (x86_debug_state32_t *)tstate); + + *count = x86_DEBUG_STATE32_COUNT; + + break; + } + case x86_DEBUG_STATE64: + { + if (*count < x86_DEBUG_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + get_debug_state64(thr_act, (x86_debug_state64_t *)tstate); + + *count = x86_DEBUG_STATE64_COUNT; + break; } + case x86_DEBUG_STATE: + { + x86_debug_state_t *state; + + if (*count < x86_DEBUG_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state_t *)tstate; + + bzero(state, sizeof *state); + + if (thread_is_64bit(thr_act)) { + state->dsh.flavor = x86_DEBUG_STATE64; + state->dsh.count = x86_DEBUG_STATE64_COUNT; + + get_debug_state64(thr_act, &state->uds.ds64); + } else { + state->dsh.flavor = x86_DEBUG_STATE32; + state->dsh.count = x86_DEBUG_STATE32_COUNT; + get_debug_state32(thr_act, &state->uds.ds32); + } + *count = x86_DEBUG_STATE_COUNT; + break; + } default: return(KERN_INVALID_ARGUMENT); } @@ -873,43 +1442,211 @@ machine_thread_get_state( return(KERN_SUCCESS); } +kern_return_t +machine_thread_get_kern_state( + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count) +{ + + /* + * This works only for an interrupted kernel thread + */ + if (thread != current_thread() || current_cpu_datap()->cpu_int_state == NULL) + return KERN_FAILURE; + + switch(flavor) { + + case x86_THREAD_STATE32: + { + + x86_thread_state32_t *state; + x86_saved_state32_t *saved_state; + + if (*count < x86_THREAD_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_thread_state32_t *)tstate; + + assert(is_saved_state32(current_cpu_datap()->cpu_int_state)); + saved_state = saved_state32(current_cpu_datap()->cpu_int_state); + /* + * General registers. + */ + state->eax = saved_state->eax; + state->ebx = saved_state->ebx; + state->ecx = saved_state->ecx; + state->edx = saved_state->edx; + state->edi = saved_state->edi; + state->esi = saved_state->esi; + state->ebp = saved_state->ebp; + state->esp = saved_state->uesp; + state->eflags = saved_state->efl; + state->eip = saved_state->eip; + state->cs = saved_state->cs; + state->ss = saved_state->ss; + state->ds = saved_state->ds & 0xffff; + state->es = saved_state->es & 0xffff; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + + *count = x86_THREAD_STATE32_COUNT; + + return KERN_SUCCESS; + } + break; // for completeness + + case x86_THREAD_STATE: + { + // wrap a 32 bit thread state into a 32/64bit clean thread state + x86_thread_state_t *state; + x86_saved_state32_t *saved_state; + + if(*count < x86_THREAD_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *)tstate; + assert(is_saved_state32(current_cpu_datap()->cpu_int_state)); + saved_state = saved_state32(current_cpu_datap()->cpu_int_state); + + state->tsh.flavor = x86_THREAD_STATE32; + state->tsh.count = x86_THREAD_STATE32_COUNT; + + /* + * General registers. + */ + + state->uts.ts32.eax = saved_state->eax; + state->uts.ts32.ebx = saved_state->ebx; + state->uts.ts32.ecx = saved_state->ecx; + state->uts.ts32.edx = saved_state->edx; + state->uts.ts32.edi = saved_state->edi; + state->uts.ts32.esi = saved_state->esi; + state->uts.ts32.ebp = saved_state->ebp; + state->uts.ts32.esp = saved_state->uesp; + state->uts.ts32.eflags = saved_state->efl; + state->uts.ts32.eip = saved_state->eip; + state->uts.ts32.cs = saved_state->cs; + state->uts.ts32.ss = saved_state->ss; + state->uts.ts32.ds = saved_state->ds & 0xffff; + state->uts.ts32.es = saved_state->es & 0xffff; + state->uts.ts32.fs = saved_state->fs & 0xffff; + state->uts.ts32.gs = saved_state->gs & 0xffff; + + *count = x86_THREAD_STATE_COUNT; + return KERN_SUCCESS; + } + break; + } + return KERN_FAILURE; +} + + /* * Initialize the machine-dependent state for a new thread. */ kern_return_t machine_thread_create( thread_t thread, - __unused task_t task) + task_t task) { - pcb_t pcb = &thread->machine.xxx_pcb; + pcb_t pcb = &thread->machine.xxx_pcb; + struct real_descriptor *ldtp; + pmap_paddr_t paddr; + x86_saved_state_t *iss; - thread->machine.pcb = pcb; + inval_copy_windows(thread); - simple_lock_init(&pcb->lock, 0); + thread->machine.physwindow_pte = 0; + thread->machine.physwindow_busy = 0; - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - pcb->iss.cs = USER_CS; - pcb->iss.ss = USER_DS; - pcb->iss.ds = USER_DS; - pcb->iss.es = USER_DS; - pcb->iss.fs = USER_DS; - pcb->iss.gs = USER_DS; - pcb->iss.efl = EFL_USER_SET; - { - struct real_descriptor *ldtp; - ldtp = (struct real_descriptor *)ldt; - pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_selector = 0; + if (task_has_64BitAddr(task)) { + x86_sframe64_t *sf64; + + sf64 = (x86_sframe64_t *)zalloc(iss_zone64); + + if (sf64 == NULL) + panic("iss_zone64"); + pcb->sf = (void *)sf64; + + bzero((char *)sf64, sizeof(x86_sframe64_t)); + + iss = (x86_saved_state_t *) &sf64->ssf; + iss->flavor = x86_SAVED_STATE64; + /* + * Guarantee that the bootstrapped thread will be in user + * mode. + */ + iss->ss_64.isf.rflags = EFL_USER_SET; + iss->ss_64.isf.cs = USER64_CS; + iss->ss_64.isf.ss = USER_DS; + iss->ss_64.fs = USER_DS; + iss->ss_64.gs = USER_DS; + } else { + if (cpu_mode_is64bit()) { + x86_sframe_compat32_t *sfc32; + + sfc32 = (x86_sframe_compat32_t *)zalloc(iss_zone32); + if (sfc32 == NULL) + panic("iss_zone32"); + pcb->sf = (void *)sfc32; + + bzero((char *)sfc32, sizeof(x86_sframe_compat32_t)); + + iss = (x86_saved_state_t *) &sfc32->ssf.iss32; + iss->flavor = x86_SAVED_STATE32; +#if DEBUG + { + x86_saved_state_compat32_t *xssc; + + xssc = (x86_saved_state_compat32_t *) iss; + xssc->pad_for_16byte_alignment[0] = 0x64326432; + xssc->pad_for_16byte_alignment[1] = 0x64326432; + } +#endif + } else { + x86_sframe32_t *sf32; + + sf32 = (x86_sframe32_t *)zalloc(iss_zone32); + + if (sf32 == NULL) + panic("iss_zone32"); + pcb->sf = (void *)sf32; + + bzero((char *)sf32, sizeof(x86_sframe32_t)); + + iss = (x86_saved_state_t *) &sf32->ssf; + iss->flavor = x86_SAVED_STATE32; + } + /* + * Guarantee that the bootstrapped thread will be in user + * mode. + */ + iss->ss_32.cs = USER_CS; + iss->ss_32.ss = USER_DS; + iss->ss_32.ds = USER_DS; + iss->ss_32.es = USER_DS; + iss->ss_32.fs = USER_DS; + iss->ss_32.gs = USER_DS; + iss->ss_32.efl = EFL_USER_SET; } + pcb->iss = iss; - /* - * Allocate a kernel stack per thread. - */ - stack_alloc(thread); + thread->machine.pcb = pcb; + simple_lock_init(&pcb->lock, 0); + + ldtp = (struct real_descriptor *)pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); + pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_selector = 0; + + pcb->iss_pte0 = (uint64_t)pte_kernel_rw(kvtophys((vm_offset_t)pcb->iss)); + + if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)(pcb->iss) + PAGE_SIZE)))) + pcb->iss_pte1 = INTEL_PTE_INVALID; + else + pcb->iss_pte1 = (uint64_t)pte_kernel_rw(paddr); return(KERN_SUCCESS); } @@ -925,15 +1662,47 @@ machine_thread_destroy( assert(pcb); - if (pcb->ims.io_tss != 0) - iopb_destroy(pcb->ims.io_tss); - if (pcb->ims.ifps != 0) - fpu_free(pcb->ims.ifps); - if (pcb->ims.ldt != 0) - user_ldt_free(pcb->ims.ldt); + if (pcb->ifps != 0) + fpu_free(pcb->ifps); + if (pcb->sf != 0) { + if (thread_is_64bit(thread)) + zfree(iss_zone64, pcb->sf); + else + zfree(iss_zone32, pcb->sf); + pcb->sf = 0; + } + if (pcb->ids) { + if (thread_is_64bit(thread)) + zfree(ids_zone64, pcb->ids); + else + zfree(ids_zone32, pcb->ids); + } thread->machine.pcb = (pcb_t)0; + +} + +void +machine_thread_switch_addrmode(thread_t thread, int oldmode_is64bit) +{ + register pcb_t pcb = thread->machine.pcb; + + assert(pcb); + + if (pcb->sf != 0) { + if (oldmode_is64bit) + zfree(iss_zone64, pcb->sf); + else + zfree(iss_zone32, pcb->sf); + } + machine_thread_create(thread, thread->task); + + /* If we're switching ourselves, reset the pcb addresses etc. */ + if (thread == current_thread()) + act_machine_switch_pcb(thread); } + + /* * This is used to set the current thr_act/thread * when starting up a new processor @@ -941,17 +1710,23 @@ machine_thread_destroy( void machine_set_current_thread( thread_t thread ) { - mp_disable_preemption(); - - current_cpu_datap()->cpu_active_thread = thread; - current_cpu_datap()->cpu_active_kloaded = THREAD_NULL; - - mp_enable_preemption(); + current_cpu_datap()->cpu_active_thread = thread; } +/* + * This is called when a task is termianted. + */ void machine_thread_terminate_self(void) { + task_t self_task = current_task(); + if (self_task) { + user_ldt_t user_ldt = self_task->i386_ldt; + if (user_ldt != 0) { + self_task->i386_ldt = 0; + user_ldt_free(user_ldt); + } + } } void @@ -982,6 +1757,37 @@ act_machine_return(int code) void machine_thread_init(void) { + if (cpu_mode_is64bit()) { + iss_zone64 = zinit(sizeof(x86_sframe64_t), + THREAD_MAX * sizeof(x86_sframe64_t), + THREAD_CHUNK * sizeof(x86_sframe64_t), + "x86_64 saved state"); + + assert(sizeof(x86_sframe_compat32_t) % 16 == 0); + iss_zone32 = zinit(sizeof(x86_sframe_compat32_t), + THREAD_MAX * sizeof(x86_sframe_compat32_t), + THREAD_CHUNK * sizeof(x86_sframe_compat32_t), + "x86_32 saved state"); + + ids_zone32 = zinit(sizeof(x86_debug_state32_t), + THREAD_MAX * (sizeof(x86_debug_state32_t)), + THREAD_CHUNK * (sizeof(x86_debug_state32_t)), + "x86_32 debug state"); + ids_zone64 = zinit(sizeof(x86_debug_state64_t), + THREAD_MAX * sizeof(x86_debug_state64_t), + THREAD_CHUNK * sizeof(x86_debug_state64_t), + "x86_64 debug state"); + + } else { + iss_zone32 = zinit(sizeof(x86_sframe32_t), + THREAD_MAX * sizeof(x86_sframe32_t), + THREAD_CHUNK * sizeof(x86_sframe32_t), + "x86 saved state"); + ids_zone32 = zinit(sizeof(x86_debug_state32_t), + THREAD_MAX * (sizeof(x86_debug_state32_t)), + THREAD_CHUNK * (sizeof(x86_debug_state32_t)), + "x86 debug state"); + } fpu_module_init(); iopb_init(); } @@ -1020,13 +1826,30 @@ dump_handlers(thread_t thr_act) void dump_regs(thread_t thr_act) { - if (thr_act->machine.pcb) { - register struct i386_saved_state *ssp = USER_REGS(thr_act); - /* Print out user register state */ + if (thr_act->machine.pcb == NULL) + return; + + if (thread_is_64bit(thr_act)) { + x86_saved_state64_t *ssp; + + ssp = USER_REGS64(thr_act); + + panic("dump_regs: 64bit tasks not yet supported"); + + } else { + x86_saved_state32_t *ssp; + + ssp = USER_REGS32(thr_act); + + /* + * Print out user register state + */ printf("\tRegs:\tedi=%x esi=%x ebp=%x ebx=%x edx=%x\n", ssp->edi, ssp->esi, ssp->ebp, ssp->ebx, ssp->edx); + printf("\t\tecx=%x eax=%x eip=%x efl=%x uesp=%x\n", ssp->ecx, ssp->eax, ssp->eip, ssp->efl, ssp->uesp); + printf("\t\tcs=%x ss=%x\n", ssp->cs, ssp->ss); } } @@ -1062,14 +1885,24 @@ dump_act(thread_t thr_act) user_addr_t get_useraddr(void) { - thread_t thr_act = current_thread(); - if (thr_act->machine.pcb) - return(thr_act->machine.pcb->iss.eip); - else - return(0); + if (thr_act->machine.pcb == NULL) + return (0); + + if (thread_is_64bit(thr_act)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thr_act); + + return(iss64->isf.rip); + } else { + x86_saved_state32_t *iss32; + iss32 = USER_REGS32(thr_act); + + return(iss32->eip); + } } /* @@ -1079,16 +1912,17 @@ get_useraddr(void) vm_offset_t machine_stack_detach(thread_t thread) { - vm_offset_t stack; + vm_offset_t stack; - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_DETACH), - thread, thread->priority, - thread->sched_pri, 0, - 0); + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DETACH), + thread, thread->priority, + thread->sched_pri, 0, + 0); - stack = thread->kernel_stack; - thread->kernel_stack = 0; - return(stack); + stack = thread->kernel_stack; + thread->kernel_stack = 0; + + return (stack); } /* @@ -1100,23 +1934,21 @@ machine_stack_attach( thread_t thread, vm_offset_t stack) { - struct i386_kernel_state *statep; - - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH), - thread, thread->priority, - thread->sched_pri, 0, 0); + struct x86_kernel_state32 *statep; - assert(stack); - statep = STACK_IKS(stack); - thread->kernel_stack = stack; + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_ATTACH), + thread, thread->priority, + thread->sched_pri, 0, 0); - statep->k_eip = (unsigned long) Thread_continue; - statep->k_ebx = (unsigned long) thread_continue; - statep->k_esp = (unsigned long) STACK_IEL(stack); + assert(stack); + thread->kernel_stack = stack; - STACK_IEL(stack)->saved_state = &thread->machine.pcb->iss; + statep = STACK_IKS(stack); + statep->k_eip = (unsigned long) Thread_continue; + statep->k_ebx = (unsigned long) thread_continue; + statep->k_esp = (unsigned long) STACK_IEL(stack); - return; + return; } /* @@ -1127,97 +1959,180 @@ void machine_stack_handoff(thread_t old, thread_t new) { - vm_offset_t stack; + vm_offset_t stack; - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF), - thread, thread->priority, - thread->sched_pri, 0, 0); + assert(new); + assert(old); - assert(new); - assert(old); + stack = old->kernel_stack; + if (stack == old->reserved_stack) { + assert(new->reserved_stack); + old->reserved_stack = new->reserved_stack; + new->reserved_stack = stack; + } + old->kernel_stack = 0; + /* + * A full call to machine_stack_attach() is unnecessry + * because old stack is already initialized. + */ + new->kernel_stack = stack; - stack = machine_stack_detach(old); - machine_stack_attach(new, stack); + fpu_save_context(old); - PMAP_SWITCH_CONTEXT(old->task, new->task, cpu_number()); + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF) | DBG_FUNC_NONE, - (int)old, (int)new, old->sched_pri, new->sched_pri, 0); + PMAP_SWITCH_CONTEXT(old, new, cpu_number()); + act_machine_switch_pcb(new); - machine_set_current_thread(new); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE, + old->reason, (int)new, old->sched_pri, new->sched_pri, 0); - current_cpu_datap()->cpu_active_stack = new->kernel_stack; + machine_set_current_thread(new); - return; + return; } -struct i386_act_context { - struct i386_saved_state ss; - struct i386_float_state fs; + + + +struct x86_act_context32 { + x86_saved_state32_t ss; + x86_float_state32_t fs; + x86_debug_state32_t ds; +}; + +struct x86_act_context64 { + x86_saved_state64_t ss; + x86_float_state64_t fs; + x86_debug_state64_t ds; }; + + void * act_thread_csave(void) { -struct i386_act_context *ic; -kern_return_t kret; -int val; + kern_return_t kret; + mach_msg_type_number_t val; + thread_t thr_act = current_thread(); + + if (thread_is_64bit(thr_act)) { + struct x86_act_context64 *ic64; - ic = (struct i386_act_context *)kalloc(sizeof(struct i386_act_context)); + ic64 = (struct x86_act_context64 *)kalloc(sizeof(struct x86_act_context64)); + + if (ic64 == (struct x86_act_context64 *)NULL) + return((void *)0); + + val = x86_SAVED_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, x86_SAVED_STATE64, + (thread_state_t) &ic64->ss, &val); + if (kret != KERN_SUCCESS) { + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } + val = x86_FLOAT_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE64, + (thread_state_t) &ic64->fs, &val); - if (ic == (struct i386_act_context *)NULL) - return((void *)0); + if (kret != KERN_SUCCESS) { + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } - val = i386_SAVED_STATE_COUNT; - kret = machine_thread_get_state(current_thread(), - i386_SAVED_STATE, - (thread_state_t) &ic->ss, + val = x86_DEBUG_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, + x86_DEBUG_STATE64, + (thread_state_t)&ic64->ds, &val); if (kret != KERN_SUCCESS) { - kfree(ic,sizeof(struct i386_act_context)); - return((void *)0); + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } + return(ic64); + + } else { + struct x86_act_context32 *ic32; + + ic32 = (struct x86_act_context32 *)kalloc(sizeof(struct x86_act_context32)); + + if (ic32 == (struct x86_act_context32 *)NULL) + return((void *)0); + + val = x86_SAVED_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, x86_SAVED_STATE32, + (thread_state_t) &ic32->ss, &val); + if (kret != KERN_SUCCESS) { + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); } - val = i386_FLOAT_STATE_COUNT; - kret = machine_thread_get_state(current_thread(), - i386_FLOAT_STATE, - (thread_state_t) &ic->fs, + val = x86_FLOAT_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE32, + (thread_state_t) &ic32->fs, &val); + if (kret != KERN_SUCCESS) { + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); + } + + val = x86_DEBUG_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, + x86_DEBUG_STATE32, + (thread_state_t)&ic32->ds, &val); if (kret != KERN_SUCCESS) { - kfree(ic,sizeof(struct i386_act_context)); - return((void *)0); + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); } - return(ic); + return(ic32); + } } + + void act_thread_catt(void *ctx) { -struct i386_act_context *ic; -kern_return_t kret; - - ic = (struct i386_act_context *)ctx; - - if (ic == (struct i386_act_context *)NULL) - return; - - kret = machine_thread_set_state(current_thread(), - i386_SAVED_STATE, - (thread_state_t) &ic->ss, - i386_SAVED_STATE_COUNT); - if (kret != KERN_SUCCESS) - goto out; - - kret = machine_thread_set_state(current_thread(), - i386_FLOAT_STATE, - (thread_state_t) &ic->fs, - i386_FLOAT_STATE_COUNT); - if (kret != KERN_SUCCESS) - goto out; -out: - kfree(ic,sizeof(struct i386_act_context)); + thread_t thr_act = current_thread(); + kern_return_t kret; + + if (ctx == (void *)NULL) + return; + + if (thread_is_64bit(thr_act)) { + struct x86_act_context64 *ic64; + + ic64 = (struct x86_act_context64 *)ctx; + + kret = machine_thread_set_state(thr_act, x86_SAVED_STATE64, + (thread_state_t) &ic64->ss, x86_SAVED_STATE64_COUNT); + if (kret == KERN_SUCCESS) { + machine_thread_set_state(thr_act, x86_FLOAT_STATE64, + (thread_state_t) &ic64->fs, x86_FLOAT_STATE64_COUNT); + } + kfree(ic64, sizeof(struct x86_act_context64)); + } else { + struct x86_act_context32 *ic32; + + ic32 = (struct x86_act_context32 *)ctx; + + kret = machine_thread_set_state(thr_act, x86_SAVED_STATE32, + (thread_state_t) &ic32->ss, x86_SAVED_STATE32_COUNT); + if (kret == KERN_SUCCESS) { + kret = machine_thread_set_state(thr_act, x86_FLOAT_STATE32, + (thread_state_t) &ic32->fs, x86_FLOAT_STATE32_COUNT); + if (kret == KERN_SUCCESS && thr_act->machine.pcb->ids) + machine_thread_set_state(thr_act, + x86_DEBUG_STATE32, + (thread_state_t)&ic32->ds, + x86_DEBUG_STATE32_COUNT); + } + kfree(ic32, sizeof(struct x86_act_context32)); + } } -void act_thread_cfree(void *ctx) + +void act_thread_cfree(__unused void *ctx) { - kfree(ctx,sizeof(struct i386_act_context)); + /* XXX - Unused */ } - diff --git a/osfmk/i386/perfmon.c b/osfmk/i386/perfmon.c index c23f7d831..bf8172d46 100644 --- a/osfmk/i386/perfmon.c +++ b/osfmk/i386/perfmon.c @@ -30,13 +30,24 @@ #include #include #include +#include -#ifdef DEBUG +#if DEBUG #define DBG(x...) kprintf(x) #else #define DBG(x...) #endif +decl_simple_lock_data(,pmc_lock) +static task_t pmc_owner = TASK_NULL; +static int pmc_thread_count = 0; + +/* PMC Facility Owner: + * TASK_NULL - no one owns it + * kernel_task - owned by pmc + * other task - owned by another task + */ + /* * Table of ESCRs and addresses associated with performance counters/CCCRs. * See Intel SDM Vol 3, Table 15-4 (section 15.9): @@ -274,24 +285,18 @@ pmc_p6_intr(void *state) (*pmc_table->ovf_func[id])(id, state); } -int -pmc_init(void) +void * +pmc_alloc(void) { int ret; - cpu_core_t *my_core; pmc_table_t *pmc_table; pmc_machine_t pmc_type; - my_core = cpu_core(); - assert(my_core); - pmc_type = _pmc_machine_type(); if (pmc_type == pmc_none) { - return KERN_FAILURE; + return NULL; } - pmc_table = (pmc_table_t *) my_core->pmc; - if (pmc_table == NULL) { ret = kmem_alloc(kernel_map, (void *) &pmc_table, sizeof(pmc_table_t)); if (ret != KERN_SUCCESS) @@ -315,18 +320,10 @@ pmc_init(void) default: break; } - if (!atomic_cmpxchg((uint32_t *) &my_core->pmc, - 0, (uint32_t) pmc_table)) { - kmem_free(kernel_map, - (vm_offset_t) pmc_table, sizeof(pmc_table_t)); - } - } - DBG("pmc_init() done for cpu %d my_core->pmc=0x%x type=%d\n", - cpu_number(), my_core->pmc, pmc_type); - - return KERN_SUCCESS; + return (void *) pmc_table; } + static inline pmc_table_t * pmc_table_valid(pmc_id_t id) { @@ -550,3 +547,81 @@ pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t func) return KERN_SUCCESS; } + +int +pmc_acquire(task_t task) +{ + kern_return_t retval = KERN_SUCCESS; + + simple_lock(&pmc_lock); + + if(pmc_owner == task) { + DBG("pmc_acquire - " + "ACQUIRED: already owner\n"); + retval = KERN_SUCCESS; + /* already own it */ + } else if(pmc_owner == TASK_NULL) { /* no one owns it */ + pmc_owner = task; + pmc_thread_count = 0; + DBG("pmc_acquire - " + "ACQUIRED: no current owner - made new owner\n"); + retval = KERN_SUCCESS; + } else { /* someone already owns it */ + if(pmc_owner == kernel_task) { + if(pmc_thread_count == 0) { + /* kernel owns it but no threads using it */ + pmc_owner = task; + pmc_thread_count = 0; + DBG("pmc_acquire - " + "ACQUIRED: owned by kernel, no threads\n"); + retval = KERN_SUCCESS; + } else { + DBG("pmc_acquire - " + "DENIED: owned by kernel, in use\n"); + retval = KERN_RESOURCE_SHORTAGE; + } + } else { /* non-kernel owner */ + DBG("pmc_acquire - " + "DENIED: owned by another task\n"); + retval = KERN_RESOURCE_SHORTAGE; + } + } + + simple_unlock(&pmc_lock); + return retval; +} + +int +pmc_release(task_t task) +{ + kern_return_t retval = KERN_SUCCESS; + task_t old_pmc_owner = pmc_owner; + + simple_lock(&pmc_lock); + + if(task != pmc_owner) { + retval = KERN_NO_ACCESS; + } else { + if(old_pmc_owner == kernel_task) { + if(pmc_thread_count>0) { + DBG("pmc_release - " + "NOT RELEASED: owned by kernel, in use\n"); + retval = KERN_NO_ACCESS; + } else { + DBG("pmc_release - " + "RELEASED: was owned by kernel\n"); + pmc_owner = TASK_NULL; + retval = KERN_SUCCESS; + } + } else { + DBG("pmc_release - " + "RELEASED: was owned by user\n"); + pmc_owner = TASK_NULL; + retval = KERN_SUCCESS; + } + } + + simple_unlock(&pmc_lock); + return retval; +} + diff --git a/osfmk/i386/perfmon.h b/osfmk/i386/perfmon.h index c8eae2a3f..3c2906b7c 100644 --- a/osfmk/i386/perfmon.h +++ b/osfmk/i386/perfmon.h @@ -283,7 +283,7 @@ typedef void (pmc_ovf_func_t)(pmc_id_t id, void *state); * In-kernel PMC access primitives: */ /* Generic: */ -extern int pmc_init(void); +extern void *pmc_alloc(void); extern int pmc_machine_type(pmc_machine_t *type); extern boolean_t pmc_is_reserved(pmc_id_t id); extern int pmc_reserve(pmc_id_t id); @@ -302,4 +302,7 @@ extern int pmc_escr_read(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); extern int pmc_escr_write(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); extern int pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t *func); +extern int pmc_acquire(task_t); +extern int pmc_release(task_t); + #endif /* _I386_PERFMON_H_ */ diff --git a/osfmk/i386/phys.c b/osfmk/i386/phys.c index 3676a4f95..76e502869 100644 --- a/osfmk/i386/phys.c +++ b/osfmk/i386/phys.c @@ -120,13 +120,17 @@ pmap_copy_part_page( vm_offset_t dst_offset, vm_size_t len) { - vm_offset_t src, dst; + pmap_paddr_t src, dst; + assert(psrc != vm_page_fictitious_addr); assert(pdst != vm_page_fictitious_addr); - src = (vm_offset_t)i386_ptob(psrc); - dst = (vm_offset_t)i386_ptob(pdst); - assert(((dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); - assert(((src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); + + src = i386_ptob(psrc); + dst = i386_ptob(pdst); + + assert((((uint32_t)dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); + assert((((uint32_t)src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); + bcopy_phys((addr64_t)src + (src_offset & INTEL_OFFMASK), (addr64_t)dst + (dst_offset & INTEL_OFFMASK), len); @@ -143,21 +147,24 @@ pmap_copy_part_lpage( vm_offset_t dst_offset, vm_size_t len) { - pt_entry_t *ptep; - thread_t thr_act = current_thread(); + mapwindow_t *map; assert(pdst != vm_page_fictitious_addr); - ptep = pmap_pte(thr_act->map->pmap, i386_ptob(pdst)); - if (0 == ptep) - panic("pmap_copy_part_lpage ptep"); - assert(((pdst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); - if (*(pt_entry_t *) CM2) - panic("pmap_copy_part_lpage"); - *(int *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (*ptep & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD; - invlpg((unsigned int) CA2); - memcpy((void *) (CA2 + (dst_offset & INTEL_OFFMASK)), (void *) src, len); - *(pt_entry_t *) CM2 = 0; + assert((dst_offset + len) <= PAGE_SIZE); + + mp_disable_preemption(); + + map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(pdst) & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD); + if (map == 0) { + panic("pmap_copy_part_lpage"); + } + invlpg((uintptr_t)map->prv_CADDR); + + memcpy((void *) (map->prv_CADDR + (dst_offset & INTEL_OFFMASK)), (void *) src, len); + *map->prv_CMAP = 0; + + mp_enable_preemption(); } /* @@ -171,21 +178,24 @@ pmap_copy_part_rpage( vm_offset_t dst, vm_size_t len) { - pt_entry_t *ptep; - thread_t thr_act = current_thread(); + mapwindow_t *map; assert(psrc != vm_page_fictitious_addr); - ptep = pmap_pte(thr_act->map->pmap, i386_ptob(psrc)); - if (0 == ptep) - panic("pmap_copy_part_rpage ptep"); - assert(((psrc & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); - if (*(pt_entry_t *) CM2) - panic("pmap_copy_part_rpage"); - *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (*ptep & PG_FRAME) | - INTEL_PTE_REF; - invlpg((unsigned int) CA2); - memcpy((void *) dst, (void *) (CA2 + (src_offset & INTEL_OFFMASK)), len); - *(pt_entry_t *) CM2 = 0; + assert((src_offset + len) <= PAGE_SIZE); + + mp_disable_preemption(); + + map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(psrc) & PG_FRAME) | + INTEL_PTE_REF); + if (map == 0) { + panic("pmap_copy_part_rpage"); + } + invlpg((uintptr_t) map->prv_CADDR); + + memcpy((void *) dst, (void *) (map->prv_CADDR + (src_offset & INTEL_OFFMASK)), len); + *map->prv_CMAP = 0; + + mp_enable_preemption(); } /* @@ -193,19 +203,19 @@ pmap_copy_part_rpage( * * Convert a kernel virtual address to a physical address */ -vm_offset_t +addr64_t kvtophys( vm_offset_t addr) { pt_entry_t *ptep; pmap_paddr_t pa; - if ((ptep = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) { - pa = 0; + if ((ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)addr)) == PT_ENTRY_NULL) { + pa = 0; } else { - pa = pte_to_pa(*ptep) | (addr & INTEL_OFFMASK); + pa = pte_to_pa(*ptep) | (addr & INTEL_OFFMASK); } - if (0 == pa) - kprintf("kvtophys ret 0!\n"); - return (pa); + + return ((addr64_t)pa); } + diff --git a/osfmk/i386/pio.h b/osfmk/i386/pio.h index c8b3b4a19..28e59196b 100644 --- a/osfmk/i386/pio.h +++ b/osfmk/i386/pio.h @@ -53,6 +53,9 @@ #define I386_PIO_H #include +#if !MACH_ASSERT +#include +#else typedef unsigned short i386_ioport_t; /* read a longword */ @@ -109,51 +112,6 @@ extern void loutb( i386_ioport_t port, char * data, int count); +#endif /* !MACH_ASSERT */ -#if defined(__GNUC__) && (!MACH_ASSERT) -extern __inline__ unsigned long inl( - i386_ioport_t port) -{ - unsigned long datum; - __asm__ volatile("inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned short inw( - i386_ioport_t port) -{ - unsigned short datum; - __asm__ volatile(".byte 0x66; inl %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ unsigned char inb( - i386_ioport_t port) -{ - unsigned char datum; - __asm__ volatile("inb %1, %0" : "=a" (datum) : "d" (port)); - return(datum); -} - -extern __inline__ void outl( - i386_ioport_t port, - unsigned long datum) -{ - __asm__ volatile("outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outw( - i386_ioport_t port, - unsigned short datum) -{ - __asm__ volatile(".byte 0x66; outl %0, %1" : : "a" (datum), "d" (port)); -} - -extern __inline__ void outb( - i386_ioport_t port, - unsigned char datum) -{ - __asm__ volatile("outb %0, %1" : : "a" (datum), "d" (port)); -} -#endif /* defined(__GNUC__) && (!MACH_ASSERT) */ #endif /* I386_PIO_H */ diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c new file mode 100644 index 000000000..aae97f4d5 --- /dev/null +++ b/osfmk/i386/pmCPU.c @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * CPU-specific power management support. + * + * Implements the "wrappers" to the KEXT. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +extern int disableConsoleOutput; + +decl_simple_lock_data(,pm_init_lock); + +/* + * The following is set when the KEXT loads and initializes. + */ +pmDispatch_t *pmDispatch = NULL; + +/* + * Current power management states (for use until KEXT is loaded). + */ +static pmInitState_t pmInitState; + +/* + * Nap control variables: + */ +uint32_t napCtl = 0; /* Defaults to neither napping + nor halting */ +uint32_t forcenap = 0; /* Force nap (fn) boot-arg controls */ +uint32_t maxBusDelay = 0xFFFFFFFF; /* Maximum memory bus delay that + I/O devices can tolerate + before errors (nanoseconds) */ +uint32_t C4C2SnoopDelay = 0; /* C4 to C2 transition time - + time before a C4 system + can snoop (nanoseconds) */ + +/* + * We are being asked to set PState (sel). + */ +void +pmsCPUSet(uint32_t sel) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUSet != NULL) + (*pmDispatch->pmsCPUSet)(sel); + else + pmInitState.PState = sel; +} + +/* + * This code configures the initial step tables. It should be called after + * the timebase frequency is initialized. + * + * Note that this is not used in normal operation. It is strictly for + * debugging/testing purposes. + */ +void +pmsCPUConf(void) +{ + + if (pmDispatch != NULL && pmDispatch->pmsCPUConf != NULL) + (*pmDispatch->pmsCPUConf)(); +} + +/* + * Machine-dependent initialization. + */ +void +pmsCPUMachineInit(void) +{ + /* + * Initialize some of the initial state to "uninitialized" until + * it gets set with something more useful. This allows the KEXT + * to determine if the initial value was actually set to something. + */ + pmInitState.PState = -1; + pmInitState.PLimit = -1; + + if (pmDispatch != NULL && pmDispatch->pmsCPUMachineInit != NULL) + (*pmDispatch->pmsCPUMachineInit)(); +} + +/* + * This function should be called once for each processor to force the + * processor to the correct initial voltage and frequency. + */ +void +pmsCPUInit(void) +{ + pmsCPUMachineInit(); + if (pmDispatch != NULL && pmDispatch->pmsCPUInit != NULL) + (*pmDispatch->pmsCPUInit)(); +} + +/* + * Broadcast a change to all processing including ourselves. + */ +void +pmsCPURun(uint32_t nstep) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPURun != NULL) + (*pmDispatch->pmsCPURun)(nstep); +} + +/* + * Return the current state of a core. + */ +uint32_t +pmsCPUQuery(void) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUQuery != NULL) + return((*pmDispatch->pmsCPUQuery)()); + + /* + * Return a non-sense value. + */ + return((~0) << 16); +} + +/* + * Return the current state of the package. + */ +uint32_t +pmsCPUPackageQuery(void) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUPackageQuery != NULL) + return((*pmDispatch->pmsCPUPackageQuery)()); + + /* + * Return a non-sense value. + */ + return((~0) << 16); +} + +/* + * Force the CPU package to the lowest power level. This is a low-level + * interface meant to be called from the panic or debugger code to bring + * the CPU to a safe power level for unmanaged operation. + * + * Note that while this will bring an entire package to a safe level, it + * cannot affect other packages. As a general rule, this should be run on + * every code as part of entering the debugger or on the panic path. + */ +void +pmsCPUYellowFlag(void) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUYellowFlag != NULL) + (*pmDispatch->pmsCPUYellowFlag)(); +} + +/* + * Restore the CPU to the power state it was in before a yellow flag. + */ +void +pmsCPUGreenFlag(void) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUGreenFlag != NULL) + (*pmDispatch->pmsCPUGreenFlag)(); +} + +/* + * Load a new ratio/VID table. + * + * Note that this interface is specific to the Intel SpeedStep implementation. + * It is expected that this will only be called once to override the default + * ratio/VID table when the platform starts. + * + * Normally, the table will need to be replaced at the same time that the + * stepper program proper is replaced, as the PState indices from an old + * program may no longer be valid. When replacing the default program this + * should not be a problem as any new table will have at least two PState + * entries and the default program only references P0 and P1. + */ +kern_return_t +pmsCPULoadVIDTable(uint16_t *tablep, int nstates) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPULoadVIDTable != NULL) + return((*pmDispatch->pmsCPULoadVIDTable)(tablep, nstates)); + else { + int i; + + if (nstates > MAX_PSTATES) + return(KERN_FAILURE); + + for (i = 0; i < nstates; i += 1) + pmInitState.VIDTable[i] = tablep[i]; + } + return(KERN_SUCCESS); +} + +/* + * Set the (global) PState limit. CPUs will not be permitted to run at + * a lower (more performant) PState than this. + */ +kern_return_t +pmsCPUSetPStateLimit(uint32_t limit) +{ + if (pmDispatch != NULL && pmDispatch->pmsCPUSetPStateLimit != NULL) + return((*pmDispatch->pmsCPUSetPStateLimit)(limit)); + + pmInitState.PLimit = limit; + return(KERN_SUCCESS); +} + +/* + * Initialize the Cstate change code. + */ +void +power_management_init(void) +{ + uint32_t cpuModel; + uint32_t cpuFamily; + uint32_t xcpuid[4]; + + /* + * Initialize the lock for the KEXT initialization. + */ + simple_lock_init(&pm_init_lock, 0); + + /* + * XXX + * + * The following is a hack to disable power management on some systems + * until the KEXT is done. This is strictly temporary!!! + */ + do_cpuid(1, xcpuid); + cpuFamily = (xcpuid[eax] >> 8) & 0xf; + cpuModel = (xcpuid[eax] >> 4) & 0xf; + + if (cpuFamily != 0x6 || cpuModel < 0xe) + pmDispatch = NULL; + + if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) + (*pmDispatch->cstateInit)(); +} + +/* + * This function will update the system nap policy. It should be called + * whenever conditions change: when the system is ready to being napping + * and if something changes the rules (e.g. a sysctl altering the policy + * for debugging). + */ +void +machine_nap_policy(void) +{ + if (pmDispatch != NULL && pmDispatch->cstateNapPolicy != NULL) + napCtl = (*pmDispatch->cstateNapPolicy)(forcenap, napCtl); +} + +/* + * ACPI calls the following routine to set/update mwait hints. A table + * (possibly null) specifies the available Cstates and their hints, all + * other states are assumed to be invalid. ACPI may update available + * states to change the nap policy (for example, while AC power is + * available). + */ +kern_return_t +Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates) +{ + if (forcenap) + return(KERN_SUCCESS); + + if (pmDispatch != NULL && pmDispatch->cstateTableSet != NULL) + return((*pmDispatch->cstateTableSet)(tablep, nstates)); + else { + unsigned int i; + + for (i = 0; i < nstates; i += 1) { + pmInitState.CStates[i].number = tablep[i].number; + pmInitState.CStates[i].hint = tablep[i].hint; + } + + pmInitState.CStatesCount = nstates; + } + return(KERN_SUCCESS); +} + +static inline void +sti(void) { + __asm__ volatile ( "sti" : : : "memory"); +} + +/* + * Called when the CPU is idle. It will choose the best C state to + * be in. + */ +void +machine_idle_cstate(void) +{ + if (pmDispatch != NULL && pmDispatch->cstateMachineIdle != NULL) + (*pmDispatch->cstateMachineIdle)(napCtl); + else { + sti(); + } +} + +static pmStats_t * +pmsCPUStats(void) +{ + cpu_data_t *pp; + + pp = current_cpu_datap(); + return(&pp->cpu_pmStats); +} + +static pmsd * +pmsCPUStepperData(void) +{ + cpu_data_t *pp; + + pp = current_cpu_datap(); + return(&pp->pms); +} + +static uint64_t * +CPUHPETAddr(void) +{ + cpu_data_t *pp; + pp = current_cpu_datap(); + return(pp->cpu_pmHpet); +} + +/* + * Called by the power management kext to register itself and to get the + * callbacks it might need into other power management functions. + */ +void +pmRegister(pmDispatch_t *cpuFuncs, pmCallBacks_t *callbacks) +{ + if (callbacks != NULL) { + callbacks->Park = pmsPark; + callbacks->Run = pmsRun; + callbacks->RunLocal = pmsRunLocal; + callbacks->SetStep = pmsSetStep; + callbacks->NapPolicy = machine_nap_policy; + callbacks->Build = pmsBuild; + callbacks->Stats = pmsCPUStats; + callbacks->StepperData = pmsCPUStepperData; + callbacks->HPETAddr = CPUHPETAddr; + callbacks->InitState = &pmInitState; + } + + if (cpuFuncs != NULL) + pmDispatch = cpuFuncs; +} + +/* + * Unregisters the power management functions from the kext. + */ +void +pmUnRegister(pmDispatch_t *cpuFuncs) +{ + if (cpuFuncs != NULL && pmDispatch == cpuFuncs) + pmDispatch = NULL; +} + +#if MACH_KDB +/* + * XXX stubs for now + */ +void +db_cfg(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_display_iokit(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_dtimers(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_intcnt(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_nap(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_pmgr(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_test(__unused db_expr_t addr, + __unused int have_addr, + __unused db_expr_t count, + __unused char *modif) +{ + return; +} + +void +db_getpmgr(__unused pmData_t *pmj) +{ +} +#endif diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h new file mode 100644 index 000000000..b107551fe --- /dev/null +++ b/osfmk/i386/pmCPU.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL_PRIVATE +#ifndef _I386_PMCPU_H_ +#define _I386_PMCPU_H_ + +#include + +#ifndef ASSEMBLER + +typedef enum { C1, C2, C3, C4, Hlt, C3Res, All, Cnum } pm_Cstate_t; +typedef struct pmStats { + uint64_t pmNapCnt[Cnum]; /* Total nap calls for states */ + uint64_t pmNapTime[Cnum]; /* Total nap time for states */ + uint64_t pmNapC2HPET; /* Total nap time for C2 using HPET for stats */ + uint64_t pmNapC4HPET; /* Total nap time for C4 using HPET for stats */ + uint64_t pmNapHPETPops; /* Number of times we detect HPET popping */ + uint64_t pmHPETRupt; /* Number of HPET interruptions */ + uint32_t pmCurC3Res; /* Current value of the C3 residency timer */ + uint32_t pmLastApic; /* Last value of apic timer */ + uint32_t pmNewApic; /* New value of apic timer */ + uint64_t pmHpetTim; /* Time to next interrupt in HPET ticks */ + uint64_t pmHpetCmp; /* HPET comparator */ + uint64_t pmHpetCfg; /* HPET configuration */ + uint64_t pmLSNs; /* (TEST) Last set nanotime */ + uint64_t pmLLHpet; /* (TEST) Last loaded HPET */ +} pmStats_t; + +#define MAX_PSTATES 32 /* architectural limit */ + +typedef enum { Cn1, Cn2, Cn3, Cn4, Cnmax } Cstate_number_t; +typedef struct { + Cstate_number_t number; + uint32_t hint; +} Cstate_hint_t; + + +struct pmData { + uint8_t pad[93]; +}; +typedef struct pmData pmData_t; + +#define pmNapHalt 0x00000010 +#define pmNapC1 0x00000008 +#define pmNapC2 0x00000004 +#define pmNapC3 0x00000002 +#define pmNapC4 0x00000001 +#define pmNapMask 0x000000FF + +#define cfgAdr 0xCF8 +#define cfgDat 0xCFC +#define lpcCfg (0x80000000 | (0 << 16) | (31 << 11) | (0 << 8)) + +/* + * Dispatch table for functions that get installed when the power + * management KEXT loads. + */ +typedef struct +{ + /* + * The following are the stepper table interfaces. + */ + void (*pmsCPUMachineInit)(void); + void (*pmsCPUInit)(void); + void (*pmsCPUSet)(uint32_t sel); + void (*pmsCPUConf)(void); + void (*pmsCPURun)(uint32_t nstep); + uint32_t (*pmsCPUQuery)(void); + uint32_t (*pmsCPUPackageQuery)(void); + void (*pmsCPUYellowFlag)(void); + void (*pmsCPUGreenFlag)(void); + kern_return_t (*pmsCPULoadVIDTable)(uint16_t *tablep, int nstates); + kern_return_t (*pmsCPUSetPStateLimit)(uint32_t limit); + + /* + * The following are the 'C' State interfaces. + */ + void (*cstateInit)(void); + void (*cstateMachineIdle)(uint32_t napCtl); + kern_return_t (*cstateTableSet)(Cstate_hint_t *tablep, unsigned int nstates); + uint32_t (*cstateNapPolicy)(uint32_t forcenap, uint32_t napCtl); +} pmDispatch_t; + +typedef struct { + uint32_t PState; + uint32_t PLimit; + uint16_t VIDTable[MAX_PSTATES]; + uint32_t VIDTableCount; + Cstate_hint_t CStates[Cnmax]; + uint32_t CStatesCount; +} pmInitState_t; + +typedef struct { + void (*Park)(void); + void (*Run)(uint32_t nstep); + void (*RunLocal)(uint32_t nstep); + void (*SetStep)(uint32_t nstep, int dir); + void (*NapPolicy)(void); + kern_return_t (*Build)(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc); + pmStats_t *(*Stats)(void); + pmsd *(*StepperData)(void); + uint64_t *(*HPETAddr)(void); + pmInitState_t *InitState; +} pmCallBacks_t; + +extern pmDispatch_t *pmDispatch; + +extern uint32_t maxBusDelay; +extern uint32_t C4C2SnoopDelay; +extern uint32_t forcenap; + +void power_management_init(void); +void machine_nap_policy(void); +kern_return_t Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates); +void machine_idle_cstate(void); +void pmRegister(pmDispatch_t *cpuFuncs, pmCallBacks_t *callbacks); +void pmUnRegister(pmDispatch_t *cpuFuncs); + +#endif /* ASSEMBLER */ +#endif /* _I386_PMCPU_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index e43dac464..b6a6372e0 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -116,6 +116,8 @@ #include #include #include +#include +#include #if MACH_KDB #include @@ -129,17 +131,35 @@ #include #include +#include + +#include + +#ifdef IWANTTODEBUG +#undef DEBUG +#define DEBUG 1 +#define POSTCODE_DELAY 1 +#include +#endif /* IWANTTODEBUG */ /* * Forward declarations for internal functions. */ +void pmap_expand_pml4( + pmap_t map, + vm_map_offset_t v); + +void pmap_expand_pdpt( + pmap_t map, + vm_map_offset_t v); + void pmap_expand( pmap_t map, - vm_offset_t v); + vm_map_offset_t v); -extern void pmap_remove_range( +static void pmap_remove_range( pmap_t pmap, - vm_offset_t va, + vm_map_offset_t va, pt_entry_t *spte, pt_entry_t *epte); @@ -155,9 +175,6 @@ void phys_attribute_set( ppnum_t phys, int bits); -void pmap_growkernel( - vm_offset_t addr); - void pmap_set_reference( ppnum_t pn); @@ -166,24 +183,21 @@ void pmap_movepage( unsigned long to, vm_size_t size); -pt_entry_t * pmap_mapgetpte( - vm_map_t map, - vm_offset_t v); - boolean_t phys_page_exists( ppnum_t pn); -#ifndef set_dirbase -void set_dirbase(vm_offset_t dirbase); -#endif /* set_dirbase */ +#ifdef PMAP_DEBUG +void dump_pmap(pmap_t); +void dump_4GB_pdpt(pmap_t p); +void dump_4GB_pdpt_thread(thread_t tp); +#endif #define iswired(pte) ((pte) & INTEL_PTE_WIRED) -#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry); -#define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry); +int nx_enabled = 1; /* enable no-execute protection */ + +int cpu_64bit = 0; -#define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL) -#define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL)) /* * Private data structures. @@ -198,7 +212,7 @@ void set_dirbase(vm_offset_t dirbase); typedef struct pv_entry { struct pv_entry *next; /* next pv_entry */ pmap_t pmap; /* pmap where mapping lies */ - vm_offset_t va; /* virtual address for mapping */ + vm_map_offset_t va; /* virtual address for mapping */ } *pv_entry_t; #define PV_ENTRY_NULL ((pv_entry_t) 0) @@ -225,7 +239,7 @@ int mappingrecurse = 0; pv_free_list = pv_e->next; \ pv_free_count--; \ if (pv_free_count < PV_LOW_WATER_MARK) \ - if (hw_compare_and_store(0,1,&mappingrecurse)) \ + if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ thread_call_enter(mapping_adjust_call); \ } \ simple_unlock(&pv_free_list_lock); \ @@ -241,10 +255,7 @@ int mappingrecurse = 0; zone_t pv_list_zone; /* zone of pv_entry structures */ -#ifdef PAE static zone_t pdpt_zone; -#endif - /* * Each entry in the pv_head_table is locked by a bit in the @@ -264,14 +275,8 @@ pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0; pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0; boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ -pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0; - -#define GROW_KERNEL_FUNCTION_IMPLEMENTED 1 -#if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */ static struct vm_object kptobj_object_store; static vm_object_t kptobj; -#endif - /* * Index into pv_head table, its lock bits, and the modify/reference @@ -302,6 +307,7 @@ char *pmap_phys_attributes; * page-directory entry. */ #define PDE_MAPPED_SIZE (pdetova(1)) +uint64_t pde_mapped_size; /* * Locking and TLB invalidation @@ -342,23 +348,21 @@ char *pmap_phys_attributes; /* * We raise the interrupt level to splvm, to block interprocessor - * interrupts during pmap operations. We must take the CPU out of - * the cpus_active set while interrupts are blocked. + * interrupts during pmap operations. We mark the cpu's cr3 inactive + * while interrupts are blocked. */ -#define SPLVM(spl) { \ - spl = splhigh(); \ - mp_disable_preemption(); \ - i_bit_clear(cpu_number(), &cpus_active); \ - mp_enable_preemption(); \ +#define SPLVM(spl) { \ + spl = splhigh(); \ + CPU_CR3_MARK_INACTIVE(); \ } -#define SPLX(spl) { \ - mp_disable_preemption(); \ - i_bit_set(cpu_number(), &cpus_active); \ - mp_enable_preemption(); \ - splx(spl); \ +#define SPLX(spl) { \ + if (current_cpu_datap()->cpu_tlb_invalid) \ + process_pmap_updates(); \ + CPU_CR3_MARK_ACTIVE(); \ + splx(spl); \ } - + /* * Lock on pmap system */ @@ -405,8 +409,8 @@ extern int disableSerialOuput; #define LOOP_CHECK(msg, pmap) \ if (--loop_count == 0) { \ mp_disable_preemption(); \ - kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \ - msg, cpu_number(), pmap, cpus_active); \ + kprintf("%s: cpu %d pmap %x\n", \ + msg, cpu_number(), pmap); \ Debugger("deadlock detection"); \ mp_enable_preemption(); \ loop_count = max_lock_loops; \ @@ -416,76 +420,15 @@ extern int disableSerialOuput; #define LOOP_CHECK(msg, pmap) #endif /* USLOCK_DEBUG */ -#define PMAP_UPDATE_TLBS(pmap, s, e) \ -{ \ - cpu_set cpu_mask; \ - cpu_set users; \ - \ - mp_disable_preemption(); \ - cpu_mask = 1 << cpu_number(); \ - \ - /* Since the pmap is locked, other updates are locked */ \ - /* out, and any pmap_activate has finished. */ \ - \ - /* find other cpus using the pmap */ \ - users = (pmap)->cpus_using & ~cpu_mask; \ - if (users) { \ - LOOP_VAR; \ - /* signal them, and wait for them to finish */ \ - /* using the pmap */ \ - signal_cpus(users, (pmap), (s), (e)); \ - while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \ - LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \ - cpu_pause(); \ - } \ - } \ - /* invalidate our own TLB if pmap is in use */ \ - \ - if ((pmap)->cpus_using & cpu_mask) { \ - INVALIDATE_TLB((pmap), (s), (e)); \ - } \ - \ - mp_enable_preemption(); \ -} - -#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ - -#define INVALIDATE_TLB(m, s, e) { \ - flush_tlb(); \ -} - -/* - * Structures to keep track of pending TLB invalidations - */ -cpu_set cpus_active; -cpu_set cpus_idle; -#define UPDATE_LIST_SIZE 4 +static void pmap_flush_tlbs(pmap_t pmap); -struct pmap_update_item { - pmap_t pmap; /* pmap to invalidate */ - vm_offset_t start; /* start address to invalidate */ - vm_offset_t end; /* end address to invalidate */ -}; +#define PMAP_UPDATE_TLBS(pmap, s, e) \ + pmap_flush_tlbs(pmap) -typedef struct pmap_update_item *pmap_update_item_t; -/* - * List of pmap updates. If the list overflows, - * the last entry is changed to invalidate all. - */ -struct pmap_update_list { - decl_simple_lock_data(,lock) - int count; - struct pmap_update_item item[UPDATE_LIST_SIZE]; -} ; -typedef struct pmap_update_list *pmap_update_list_t; +#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ -extern void signal_cpus( - cpu_set use_list, - pmap_t pmap, - vm_offset_t start, - vm_offset_t end); pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; @@ -493,14 +436,12 @@ pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; * Other useful macros. */ #define current_pmap() (vm_map_pmap(current_thread()->map)) -#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0) struct pmap kernel_pmap_store; pmap_t kernel_pmap; -#ifdef PMAP_QUEUE -decl_simple_lock_data(,free_pmap_lock) -#endif +pd_entry_t high_shared_pde; +pd_entry_t commpage64_pde; struct zone *pmap_zone; /* zone of pmap structures */ @@ -508,6 +449,9 @@ int pmap_debug = 0; /* flag for debugging prints */ unsigned int inuse_ptepages_count = 0; /* debugging */ +addr64_t kernel64_cr3; +boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ + /* * Pmap cache. Cache is threaded through ref_count field of pmap. * Max will eventually be constant -- variable for experimentation. @@ -518,11 +462,11 @@ pmap_t pmap_cache_list; int pmap_cache_count; decl_simple_lock_data(,pmap_cache_lock) -extern vm_offset_t hole_start, hole_end; - extern char end; static int nkpt; +extern uint32_t lowGlo; +extern void *version; pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; @@ -533,7 +477,7 @@ caddr_t DADDR2; struct pmap_alias { vm_offset_t rpc; pmap_t pmap; - vm_offset_t va; + vm_map_offset_t va; int cookie; #define PMAP_ALIAS_COOKIE 0xdeadbeef } pmap_aliasbuf[PMAP_ALIAS_MAX]; @@ -542,73 +486,183 @@ extern vm_offset_t get_rpc(); #endif /* DEBUG_ALIAS */ -#define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])) -#define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT]) - -static __inline int -pmap_is_current(pmap_t pmap) +/* + * for legacy, returns the address of the pde entry. + * for 64 bit, causes the pdpt page containing the pde entry to be mapped, + * then returns the mapped address of the pde entry in that page + */ +pd_entry_t * +pmap_pde(pmap_t m, vm_map_offset_t v) { - return (pmap == kernel_pmap || - (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); + pd_entry_t *pde; + if (!cpu_64bit || (m == kernel_pmap)) { + pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])); + } else { + assert(m); + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + pde = pmap64_pde(m, v); + } + return pde; } /* - * return address of mapped pte for vaddr va in pmap pmap. + * the single pml4 page per pmap is allocated at pmap create time and exists + * for the duration of the pmap. we allocate this page in kernel vm (to save us one + * level of page table dynamic mapping. + * this returns the address of the requested pml4 entry in the top level page. */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_offset_t va) -{ - pd_entry_t *pde; - pd_entry_t newpf; - - pde = pmap_pde(pmap, va); - if (*pde != 0) { - if (pmap_is_current(pmap)) - return( vtopte(va)); - newpf = *pde & PG_FRAME; - if (((*CM4) & PG_FRAME) != newpf) { - *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID; - invlpg((u_int)CA4); - } - return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1)); +static inline +pml4_entry_t * +pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr) +{ + return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1)))); +} + +/* + * maps in the pml4 page, if any, containing the pdpt entry requested + * and returns the address of the pdpt entry in that mapped page + */ +pdpt_entry_t * +pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr) +{ + pml4_entry_t newpf; + pml4_entry_t *pml4; + int i; + + assert(pmap); + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { + return(0); } - return(0); + + pml4 = pmap64_pml4(pmap, vaddr); + + if (pml4 && ((*pml4 & INTEL_PTE_VALID))) { + + newpf = *pml4 & PG_FRAME; + + + for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) { + if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { + return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + + ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); + } + } + + current_cpu_datap()->cpu_pmap->pdpt_window_index++; + if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1)) + current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; + pmap_store_pte( + (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP), + newpf | INTEL_PTE_RW | INTEL_PTE_VALID); + invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR)); + return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) + + ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); + } + + return (0); } - -#define DEBUG_PTE_PAGE 0 -#if DEBUG_PTE_PAGE -void -ptep_check( - ptep_t ptep) +/* + * maps in the pdpt page, if any, containing the pde entry requested + * and returns the address of the pde entry in that mapped page + */ +pd_entry_t * +pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr) { - register pt_entry_t *pte, *epte; - int ctu, ctw; + pdpt_entry_t newpf; + pdpt_entry_t *pdpt; + int i; - /* check the use and wired counts */ - if (ptep == PTE_PAGE_NULL) - return; - pte = pmap_pte(ptep->pmap, ptep->va); - epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t); - ctu = 0; - ctw = 0; - while (pte < epte) { - if (pte->pfn != 0) { - ctu++; - if (pte->wired) - ctw++; + assert(pmap); + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { + return(0); + } + + /* if (vaddr & (1ULL << 63)) panic("neg addr");*/ + pdpt = pmap64_pdpt(pmap, vaddr); + + if (pdpt && ((*pdpt & INTEL_PTE_VALID))) { + + newpf = *pdpt & PG_FRAME; + + for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) { + if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { + return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + + ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); + } } - pte++; + + current_cpu_datap()->cpu_pmap->pde_window_index++; + if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1)) + current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW; + pmap_store_pte( + (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP), + newpf | INTEL_PTE_RW | INTEL_PTE_VALID); + invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR)); + return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) + + ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); } - if (ctu != ptep->use_count || ctw != ptep->wired_count) { - printf("use %d wired %d - actual use %d wired %d\n", - ptep->use_count, ptep->wired_count, ctu, ctw); - panic("pte count"); + return (0); +} + + + +/* + * return address of mapped pte for vaddr va in pmap pmap. + * must be called with pre-emption or interrupts disabled + * if targeted pmap is not the kernel pmap + * since we may be passing back a virtual address that is + * associated with this cpu... pre-emption or interrupts + * must remain disabled until the caller is done using + * the pointer that was passed back . + * + * maps the pde page, if any, containing the pte in and returns + * the address of the pte in that mapped page + */ +pt_entry_t * +pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) +{ + pd_entry_t *pde; + pd_entry_t newpf; + int i; + + assert(pmap); + pde = pmap_pde(pmap,vaddr); + + if (pde && ((*pde & INTEL_PTE_VALID))) { + if (pmap == kernel_pmap) { + return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ + } + + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + + newpf = *pde & PG_FRAME; + + for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) { + if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { + return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + + ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); + } + } + + current_cpu_datap()->cpu_pmap->pte_window_index++; + if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1)) + current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW; + pmap_store_pte( + (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP), + newpf | INTEL_PTE_RW | INTEL_PTE_VALID); + invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR)); + return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) + + ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); } + + return(0); } -#endif /* DEBUG_PTE_PAGE */ + /* * Map memory at initialization. The physical addresses being @@ -619,17 +673,18 @@ ptep_check( */ vm_offset_t pmap_map( - register vm_offset_t virt, - register vm_offset_t start_addr, - register vm_offset_t end_addr, - register vm_prot_t prot) + vm_offset_t virt, + vm_map_offset_t start_addr, + vm_map_offset_t end_addr, + vm_prot_t prot, + unsigned int flags) { - register int ps; + int ps; ps = PAGE_SIZE; while (start_addr < end_addr) { - pmap_enter(kernel_pmap, - virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE); + pmap_enter(kernel_pmap, (vm_map_offset_t)virt, + (ppnum_t) i386_btop(start_addr), prot, flags, FALSE); virt += ps; start_addr += ps; } @@ -645,30 +700,36 @@ pmap_map( */ vm_offset_t pmap_map_bd( - register vm_offset_t virt, - register vm_offset_t start_addr, - register vm_offset_t end_addr, - vm_prot_t prot) + vm_offset_t virt, + vm_map_offset_t start_addr, + vm_map_offset_t end_addr, + vm_prot_t prot, + unsigned int flags) { - register pt_entry_t template; - register pt_entry_t *pte; + pt_entry_t template; + pt_entry_t *pte; template = pa_to_pte(start_addr) - | INTEL_PTE_NCACHE | INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID; + + if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) { + template |= INTEL_PTE_NCACHE; + if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT))) + template |= INTEL_PTE_PTA; + } + if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; - /* XXX move pmap_pte out of loop, once one pte mapped, all are */ while (start_addr < end_addr) { - pte = pmap_pte(kernel_pmap, virt); + pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); if (pte == PT_ENTRY_NULL) { panic("pmap_map_bd: Invalid kernel address\n"); } - WRITE_PTE_FAST(pte, template) + pmap_store_pte(pte, template); pte_increment_pa(template); virt += PAGE_SIZE; start_addr += PAGE_SIZE; @@ -685,6 +746,142 @@ extern vm_offset_t etext; extern void *sectHIBB; extern int sectSizeHIB; + +vm_offset_t +pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz) +{ + vm_offset_t ve = pmap_index_to_virt(e); + pt_entry_t *ptep; + pmap_paddr_t pa; + int i; + + assert(0 == (va & PAGE_MASK)); /* expecting page aligned */ + ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve); + + for (i=0; i< sz; i++) { + pa = (pmap_paddr_t) kvtophys(va); + pmap_store_pte(ptep, (pa & PG_FRAME) + | INTEL_PTE_VALID + | INTEL_PTE_GLOBAL + | INTEL_PTE_RW + | INTEL_PTE_REF + | INTEL_PTE_MOD); + va+= PAGE_SIZE; + ptep++; + } + return ve; +} + +vm_offset_t +pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz) +{ + enum high_fixed_addresses a = e + HIGH_CPU_END * cpu; + return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz); +} + +void pmap_init_high_shared(void); + +extern vm_offset_t gdtptr, idtptr; + +extern uint32_t low_intstack; + +extern struct fake_descriptor ldt_desc_pattern; +extern struct fake_descriptor tss_desc_pattern; + +extern char hi_remap_text, hi_remap_etext; +extern char t_zero_div; + +pt_entry_t *pte_unique_base; + +void +pmap_init_high_shared(void) +{ + + vm_offset_t haddr; + struct __gdt_desc_struct gdt_desc = {0,0,0}; + struct __idt_desc_struct idt_desc = {0,0,0}; +#if MACH_KDB + struct i386_tss *ttss; +#endif + + kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", + HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); + pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); + + if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) > + HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1) + panic("tramps too large"); + haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, + (vm_offset_t) &hi_remap_text, 3); + kprintf("tramp: 0x%x, ",haddr); + printf("hi mem tramps at 0x%x\n",haddr); + /* map gdt up high and update ptr for reload */ + haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, + (vm_offset_t) master_gdt, 1); + __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); + gdt_desc.address = haddr; + kprintf("GDT: 0x%x, ",haddr); + /* map ldt up high */ + haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, + (vm_offset_t) master_ldt, + HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); + kprintf("LDT: 0x%x, ",haddr); + /* put new ldt addr into gdt */ + master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; + master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1); + master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; + master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(USER_LDT)], 1); + + /* map idt up high */ + haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, + (vm_offset_t) master_idt, 1); + __asm__ __volatile__("sidt %0" : "=m" (idt_desc)); + idt_desc.address = haddr; + kprintf("IDT: 0x%x, ", haddr); + /* remap ktss up high and put new high addr into gdt */ + haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, + (vm_offset_t) &master_ktss, 1); + master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; + master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1); + kprintf("KTSS: 0x%x, ",haddr); +#if MACH_KDB + /* remap dbtss up high and put new high addr into gdt */ + haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS, + (vm_offset_t) &master_dbtss, 1); + master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; + master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1); + ttss = (struct i386_tss *)haddr; + kprintf("DBTSS: 0x%x, ",haddr); +#endif /* MACH_KDB */ + + /* remap dftss up high and put new high addr into gdt */ + haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, + (vm_offset_t) &master_dftss, 1); + master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern; + master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(DF_TSS)], 1); + kprintf("DFTSS: 0x%x\n",haddr); + + /* remap mctss up high and put new high addr into gdt */ + haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, + (vm_offset_t) &master_mctss, 1); + master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern; + master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr; + fix_desc(&master_gdt[sel_idx(MC_TSS)], 1); + kprintf("MCTSS: 0x%x\n",haddr); + + __asm__ __volatile__("lgdt %0": "=m" (gdt_desc)); + __asm__ __volatile__("lidt %0": "=m" (idt_desc)); + kprintf("gdt/idt reloaded, "); + set_tr(KERNEL_TSS); + kprintf("tr reset to KERNEL_TSS\n"); +} + + /* * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. @@ -706,50 +903,60 @@ extern int sectSizeHIB; void pmap_bootstrap( - __unused vm_offset_t load_start) + __unused vm_offset_t load_start, + boolean_t IA32e) { vm_offset_t va; pt_entry_t *pte; int i; int wpkernel, boot_arg; + pdpt_entry_t *pdpt; vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address * known to VM */ - /* * The kernel's pmap is statically allocated so we don't * have to use pmap_create, which is unlikely to work * correctly at this part of the boot sequence. */ + kernel_pmap = &kernel_pmap_store; -#ifdef PMAP_QUEUE - kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */ - kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */ -#endif kernel_pmap->ref_count = 1; + kernel_pmap->nx_enabled = FALSE; + kernel_pmap->pm_64bit = 0; kernel_pmap->pm_obj = (vm_object_t) NULL; kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE); - kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD; -#ifdef PAE - kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); - kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT; -#endif + kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD); + pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); + kernel_pmap->pm_pdpt = pdpt; + kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); va = (vm_offset_t)kernel_pmap->dirbase; /* setup self referential mapping(s) */ - for (i = 0; i< NPGPTD; i++ ) { + for (i = 0; i< NPGPTD; i++, pdpt++) { pmap_paddr_t pa; pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); - * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) = + pmap_store_pte( + (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | - INTEL_PTE_MOD | INTEL_PTE_WIRED ; -#ifdef PAE - kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID; -#endif + INTEL_PTE_MOD | INTEL_PTE_WIRED) ; + pmap_store_pte(pdpt, pa | INTEL_PTE_VALID); } + cpu_64bit = IA32e; + + lo_kernel_cr3 = kernel_pmap->pm_cr3; + current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; + + /* save the value we stuff into created pmaps to share the gdts etc */ + high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE); + /* make sure G bit is on for high shared pde entry */ + high_shared_pde |= INTEL_PTE_GLOBAL; + pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde); + nkpt = NKPT; + inuse_ptepages_count += NKPT; virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); @@ -759,25 +966,18 @@ pmap_bootstrap( * mapping of pages. */ #define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n); + v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n) va = virtual_avail; - pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); - - /* - * CMAP1/CMAP2 are used for zeroing and copying pages. - * CMAP3 is used for ml_phys_read/write. - */ - SYSMAP(caddr_t, CM1, CA1, 1) - * (pt_entry_t *) CM1 = 0; - SYSMAP(caddr_t, CM2, CA2, 1) - * (pt_entry_t *) CM2 = 0; - SYSMAP(caddr_t, CM3, CA3, 1) - * (pt_entry_t *) CM3 = 0; + pte = vtopte(va); - /* used by pmap_pte */ - SYSMAP(caddr_t, CM4, CA4, 1) - * (pt_entry_t *) CM4 = 0; + for (i=0; icpu_pmap->mapwindow[i].prv_CMAP), + (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), + 1); + *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; + } /* DMAP user for debugger */ SYSMAP(caddr_t, DMAP1, DADDR1, 1); @@ -791,45 +991,109 @@ pmap_bootstrap( virtual_avail = va; wpkernel = 1; - if (PE_parse_boot_arg("debug", &boot_arg)) { - if (boot_arg & DB_PRT) wpkernel = 0; - if (boot_arg & DB_NMI) wpkernel = 0; + if (PE_parse_boot_arg("wpkernel", &boot_arg)) { + if (boot_arg == 0) + wpkernel = 0; } - /* remap kernel text readonly if not debugging or kprintfing */ + /* Remap kernel text readonly unless the "wpkernel" boot-arg is present + * and set to 0. + */ if (wpkernel) { vm_offset_t myva; pt_entry_t *ptep; - for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { + for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB)) continue; - ptep = pmap_pte(kernel_pmap, myva); + ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); if (ptep) - *ptep &= ~INTEL_PTE_RW; + pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); } - flush_tlb(); } + /* no matter what, kernel page zero is not accessible */ + pte = pmap_pte(kernel_pmap, 0); + pmap_store_pte(pte, INTEL_PTE_INVALID); + + /* map lowmem global page into fixed addr 0x2000 */ + if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte"); + + pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW); + flush_tlb(); + simple_lock_init(&kernel_pmap->lock, 0); simple_lock_init(&pv_free_list_lock, 0); - /* invalidate user virtual addresses */ - memset((char *)kernel_pmap->dirbase, - 0, - (KPTDI) * sizeof(pd_entry_t)); + pmap_init_high_shared(); + + pde_mapped_size = PDE_MAPPED_SIZE; + + if (cpu_64bit) { + pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT; + pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; + pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; + int istate = ml_set_interrupts_enabled(FALSE); + + /* + * Clone a new 64-bit 3rd-level page table directory, IdlePML4, + * with page bits set for the correct IA-32e operation and so that + * the legacy-mode IdlePDPT is retained for slave processor start-up. + * This is necessary due to the incompatible use of page bits between + * 64-bit and legacy modes. + */ + kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */ + kernel_pmap->pm_pml4 = IdlePML4; + kernel_pmap->pm_pdpt = (pd_entry_t *) + ((unsigned int)IdlePDPT64 | KERNBASE ); +#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF + pmap_store_pte(kernel_pmap->pm_pml4, + (uint32_t)IdlePDPT64 | PAGE_BITS); + pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS); + pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS); + pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS); + pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS); + + /* + * The kernel is also mapped in the uber-sapce at the 4GB starting + * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level. + */ + pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0)); + + kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; + cpu_IA32e_enable(current_cpu_datap()); + current_cpu_datap()->cpu_is64bit = TRUE; + /* welcome to a 64 bit world */ + + /* Re-initialize and load descriptors */ + cpu_desc_init64(&cpu_data_master, TRUE); + cpu_desc_load64(&cpu_data_master); + fast_syscall_init64(); + + pde_mapped_size = 512*4096 ; + + ml_set_interrupts_enabled(istate); + + } + kernel_pmap->pm_hold = kernel_pmap->pm_pml4; kprintf("Kernel virtual space from 0x%x to 0x%x.\n", VADDR(KPTDI,0), virtual_end); -#ifdef PAE - kprintf("Available physical space from 0x%llx to 0x%llx\n", - avail_start, avail_end); printf("PAE enabled\n"); -#else - kprintf("Available physical space from 0x%x to 0x%x\n", + if (cpu_64bit){ + printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); } + + kprintf("Available physical space from 0x%llx to 0x%llx\n", avail_start, avail_end); -#endif + + /* + * By default for 64-bit users loaded at 4GB, share kernel mapping. + * But this may be overridden by the -no_shared_cr3 boot-arg. + */ + if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) { + kprintf("Shared kernel address space disabled\n"); + } } void @@ -852,8 +1116,8 @@ pmap_init(void) register long npages; vm_offset_t addr; register vm_size_t s; - vm_offset_t vaddr; - ppnum_t ppn; + vm_map_offset_t vaddr; + ppnum_t ppn; /* * Allocate memory for the pv_head_table and its lock bits, @@ -892,11 +1156,8 @@ pmap_init(void) pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ s = (vm_size_t) sizeof(struct pv_entry); pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ -#ifdef PAE - // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD); s = 63; pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ -#endif /* * Only now, when all of the data structures are allocated, @@ -909,23 +1170,22 @@ pmap_init(void) vm_first_phys = 0; vm_last_phys = avail_end; -#if GROW_KERNEL_FUNCTION_IMPLEMENTED kptobj = &kptobj_object_store; _vm_object_allocate((vm_object_size_t)NKPDE, kptobj); kernel_pmap->pm_obj = kptobj; -#endif /* create pv entries for kernel pages mapped by low level startup code. these have to exist so we can pmap_remove() e.g. kext pages from the middle of our addr space */ - vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS; + vaddr = (vm_map_offset_t)0; for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) { pv_entry_t pv_e; pv_e = pai_to_pvh(ppn); pv_e->va = vaddr; vaddr += PAGE_SIZE; + kernel_pmap->stats.resident_count++; pv_e->pmap = kernel_pmap; pv_e->next = PV_ENTRY_NULL; } @@ -938,10 +1198,6 @@ pmap_init(void) pmap_cache_list = PMAP_NULL; pmap_cache_count = 0; simple_lock_init(&pmap_cache_lock, 0); -#ifdef PMAP_QUEUE - simple_lock_init(&free_pmap_lock, 0); -#endif - } void @@ -952,7 +1208,7 @@ x86_lowmem_free(void) the actual pages that are released are determined by which pages the memory sizing code puts into the region table */ - ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS, + ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base), (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base)); } @@ -1002,15 +1258,21 @@ pmap_verify_free( */ pmap_t pmap_create( - vm_size_t size) + vm_map_size_t sz, + boolean_t is_64bit) { - register pmap_t p; -#ifdef PMAP_QUEUE - register pmap_t pro; - spl_t s; -#endif - register int i; - register vm_offset_t va; + register pmap_t p; + int i; + vm_offset_t va; + vm_size_t size; + pdpt_entry_t *pdpt; + pml4_entry_t *pml4p; + vm_page_t m; + int template; + pd_entry_t *pdp; + spl_t s; + + size = (vm_size_t) sz; /* * A software use-only map doesn't even need a map. @@ -1023,61 +1285,168 @@ pmap_create( p = (pmap_t) zalloc(pmap_zone); if (PMAP_NULL == p) panic("pmap_create zalloc"); - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) - panic("pmap_create kmem_alloc_wired"); -#ifdef PAE - p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); - if ((vm_offset_t)NULL == p->pm_hold) { - panic("pdpt zalloc"); - } - p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); - p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */ -#endif - if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG)))) - panic("pmap_create vm_object_allocate"); - memcpy(p->dirbase, - (void *)((unsigned int)IdlePTD | KERNBASE), - NBPTD); - va = (vm_offset_t)p->dirbase; - p->pdirbase = (pd_entry_t *)(kvtophys(va)); - simple_lock_init(&p->lock, 0); - - /* setup self referential mapping(s) */ - for (i = 0; i< NPGPTD; i++ ) { - pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); - * (pd_entry_t *) (p->dirbase + PTDPTDI + i) = - (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | - INTEL_PTE_MOD | INTEL_PTE_WIRED ; -#ifdef PAE - p->pm_pdpt[i] = pa | INTEL_PTE_VALID; -#endif - } - p->cpus_using = 0; + /* init counts now since we'll be bumping some */ + simple_lock_init(&p->lock, 0); p->stats.resident_count = 0; p->stats.wired_count = 0; p->ref_count = 1; + p->nx_enabled = 1; + p->pm_64bit = is_64bit; + p->pm_kernel_cr3 = FALSE; + + if (!cpu_64bit) { + /* legacy 32 bit setup */ + /* in the legacy case the pdpt layer is hardwired to 4 entries and each + * entry covers 1GB of addr space */ + if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) + panic("pmap_create kmem_alloc_wired"); + p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); + if ((vm_offset_t)NULL == p->pm_hold) { + panic("pdpt zalloc"); + } + pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); + p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt); + if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG)))) + panic("pmap_create vm_object_allocate"); + + memset((char *)p->dirbase, 0, NBPTD); + + va = (vm_offset_t)p->dirbase; + p->pdirbase = kvtophys(va); + + template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID; + for (i = 0; i< NPGPTD; i++, pdpt++) { + pmap_paddr_t pa; + pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + pmap_store_pte(pdpt, pa | template); + } + + /* map the high shared pde */ + pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde); -#ifdef PMAP_QUEUE - /* insert new pmap at head of queue hanging off kernel_pmap */ - SPLVM(s); - simple_lock(&free_pmap_lock); - p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next; - kernel_pmap->pmap_link.next = (queue_t)p; + } else { - pro = (pmap_t) p->pmap_link.next; - p->pmap_link.prev = (queue_t)pro->pmap_link.prev; - pro->pmap_link.prev = (queue_t)p; + /* 64 bit setup */ - - simple_unlock(&free_pmap_lock); - SPLX(s); -#endif + /* alloc the pml4 page in kernel vm */ + if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) + panic("pmap_create kmem_alloc_wired pml4"); + + memset((char *)p->pm_hold, 0, PAGE_SIZE); + p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); + + inuse_ptepages_count++; + p->stats.resident_count++; + p->stats.wired_count++; + + /* allocate the vm_objs to hold the pdpt, pde and pte pages */ + + if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)))) + panic("pmap_create pdpt obj"); + + if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)))) + panic("pmap_create pdpt obj"); + + if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)))) + panic("pmap_create pte obj"); + + /* uber space points to uber mapped kernel */ + s = splhigh(); + pml4p = pmap64_pml4(p, 0ULL); + pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4); + if (!is_64bit) { + while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) { + splx(s); + pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */ + s = splhigh(); + } + pmap_store_pte(pdp, high_shared_pde); + } + + splx(s); + } return(p); } +void +pmap_set_4GB_pagezero(pmap_t p) +{ + int spl; + pdpt_entry_t *user_pdptp; + pdpt_entry_t *kern_pdptp; + + assert(p->pm_64bit); + + /* Kernel-shared cr3 may be disabled by boot arg. */ + if (no_shared_cr3) + return; + + /* + * Set the bottom 4 3rd-level pte's to be the kernel's. + */ + spl = splhigh(); + while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { + splx(spl); + pmap_expand_pml4(p, 0x0); + spl = splhigh(); + } + kern_pdptp = kernel_pmap->pm_pdpt; + pmap_store_pte(user_pdptp+0, *(kern_pdptp+0)); + pmap_store_pte(user_pdptp+1, *(kern_pdptp+1)); + pmap_store_pte(user_pdptp+2, *(kern_pdptp+2)); + pmap_store_pte(user_pdptp+3, *(kern_pdptp+3)); + + p->pm_kernel_cr3 = TRUE; + + splx(spl); + +} + +void +pmap_load_kernel_cr3(void) +{ + uint32_t kernel_cr3; + + assert(!ml_get_interrupts_enabled()); + + /* + * Reload cr3 with the true kernel cr3. + * Note: kernel's pml4 resides below 4GB physical. + */ + kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3; + set_cr3(kernel_cr3); + current_cpu_datap()->cpu_active_cr3 = kernel_cr3; + current_cpu_datap()->cpu_task_map = TASK_MAP_32BIT; + current_cpu_datap()->cpu_tlb_invalid = FALSE; + __asm__ volatile("mfence"); +} + +void +pmap_clear_4GB_pagezero(pmap_t p) +{ + int spl; + pdpt_entry_t *user_pdptp; + uint32_t cr3; + + if (!p->pm_kernel_cr3) + return; + + spl = splhigh(); + user_pdptp = pmap64_pdpt(p, 0x0); + pmap_store_pte(user_pdptp+0, 0); + pmap_store_pte(user_pdptp+1, 0); + pmap_store_pte(user_pdptp+2, 0); + pmap_store_pte(user_pdptp+3, 0); + + p->pm_kernel_cr3 = FALSE; + + pmap_load_kernel_cr3(); + + splx(spl); +} + /* * Retire the given physical map from service. * Should only be called if the map contains @@ -1088,47 +1457,29 @@ void pmap_destroy( register pmap_t p) { - register pt_entry_t *pdep; register int c; spl_t s; +#if 0 + register pt_entry_t *pdep; register vm_page_t m; -#ifdef PMAP_QUEUE - register pmap_t pre,pro; #endif if (p == PMAP_NULL) return; - SPLVM(s); simple_lock(&p->lock); c = --p->ref_count; if (c == 0) { - register int my_cpu; - - mp_disable_preemption(); - my_cpu = cpu_number(); - /* * If some cpu is not using the physical pmap pointer that it * is supposed to be (see set_dirbase), we might be using the * pmap that is being destroyed! Make sure we are * physically on the right pmap: */ - /* force pmap/cr3 update */ PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_KERNEL_ADDRESS); - if (PMAP_REAL(my_cpu) == p) { - PMAP_CPU_CLR(p, my_cpu); - PMAP_REAL(my_cpu) = kernel_pmap; -#ifdef PAE - set_cr3((unsigned int)kernel_pmap->pm_ppdpt); -#else - set_cr3((unsigned int)kernel_pmap->pdirbase); -#endif - } - mp_enable_preemption(); } simple_unlock(&p->lock); SPLX(s); @@ -1137,31 +1488,21 @@ pmap_destroy( return; /* still in use */ } -#ifdef PMAP_QUEUE - /* remove from pmap queue */ - SPLVM(s); - simple_lock(&free_pmap_lock); - - pre = (pmap_t)p->pmap_link.prev; - pre->pmap_link.next = (queue_t)p->pmap_link.next; - pro = (pmap_t)p->pmap_link.next; - pro->pmap_link.prev = (queue_t)p->pmap_link.prev; - - simple_unlock(&free_pmap_lock); - SPLX(s); -#endif - /* * Free the memory maps, then the * pmap structure. */ + if (!cpu_64bit) { +#if 0 pdep = (pt_entry_t *)p->dirbase; while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) { - int ind; + int ind; + if (*pdep & INTEL_PTE_VALID) { - ind = pdep - (pt_entry_t *)&p->dirbase[0]; + ind = pdep - (pt_entry_t *)&p->dirbase[0]; + vm_object_lock(p->pm_obj); m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind); if (m == VM_PAGE_NULL) { @@ -1170,25 +1511,49 @@ pmap_destroy( vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; + vm_object_unlock(p->pm_obj); vm_page_unlock_queues(); /* * Clear pdes, this might be headed for the cache. */ - *pdep++ = 0; + pmap_store_pte(pdep, 0); + pdep++; } else { - *pdep++ = 0; + pmap_store_pte(pdep, 0); + pdep++; } } - - vm_object_deallocate(p->pm_obj); - kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); -#ifdef PAE - zfree(pdpt_zone, (void *)p->pm_hold); +#else + inuse_ptepages_count -= p->pm_obj->resident_page_count; #endif + vm_object_deallocate(p->pm_obj); + kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); + zfree(pdpt_zone, (void *)p->pm_hold); + } else { + + /* 64 bit */ + + pmap_unmap_sharedpage(p); + + /* free 64 bit mode structs */ + inuse_ptepages_count--; + kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE); + + inuse_ptepages_count -= p->pm_obj_pml4->resident_page_count; + vm_object_deallocate(p->pm_obj_pml4); + + inuse_ptepages_count -= p->pm_obj_pdpt->resident_page_count; + vm_object_deallocate(p->pm_obj_pdpt); + + inuse_ptepages_count -= p->pm_obj->resident_page_count; + vm_object_deallocate(p->pm_obj); + + } + zfree(pmap_zone, p); } @@ -1223,11 +1588,10 @@ pmap_reference( * Assumes that the pte-page exists. */ -/* static */ -void +static void pmap_remove_range( pmap_t pmap, - vm_offset_t va, + vm_map_offset_t vaddr, pt_entry_t *spte, pt_entry_t *epte) { @@ -1236,21 +1600,16 @@ pmap_remove_range( int pai; pmap_paddr_t pa; -#if DEBUG_PTE_PAGE - if (pmap != kernel_pmap) - ptep_check(get_pte_page(spte)); -#endif /* DEBUG_PTE_PAGE */ num_removed = 0; num_unwired = 0; for (cpte = spte; cpte < epte; - cpte++, va += PAGE_SIZE) { + cpte++, vaddr += PAGE_SIZE) { pa = pte_to_pa(*cpte); if (pa == 0) continue; - num_removed++; if (iswired(*cpte)) num_unwired++; @@ -1262,9 +1621,10 @@ pmap_remove_range( */ register pt_entry_t *lpte = cpte; - *lpte = 0; + pmap_store_pte(lpte, 0); continue; } + num_removed++; pai = pa_index(pa); LOCK_PVH(pai); @@ -1276,9 +1636,9 @@ pmap_remove_range( register pt_entry_t *lpte; lpte = cpte; - pmap_phys_attributes[pai] |= + pmap_phys_attributes[pai] |= *lpte & (PHYS_MODIFIED|PHYS_REFERENCED); - *lpte = 0; + pmap_store_pte(lpte, 0); } @@ -1293,7 +1653,7 @@ pmap_remove_range( if (pv_h->pmap == PMAP_NULL) { panic("pmap_remove: null pv_list!"); } - if (pv_h->va == va && pv_h->pmap == pmap) { + if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* * Header is the pv_entry. Copy the next one * to header and free the next one (we cannot @@ -1315,7 +1675,7 @@ pmap_remove_range( if ((cur = prev->next) == PV_ENTRY_NULL) { panic("pmap-remove: mapping not in pv_list!"); } - } while (cur->va != va || cur->pmap != pmap); + } while (cur->va != vaddr || cur->pmap != pmap); prev->next = cur->next; PV_FREE(cur); } @@ -1364,39 +1724,31 @@ pmap_remove( spl_t spl; register pt_entry_t *pde; register pt_entry_t *spte, *epte; - vm_offset_t l; - vm_offset_t s, e; - vm_offset_t orig_s; + addr64_t l64; + addr64_t orig_s64; - if (map == PMAP_NULL) + if (map == PMAP_NULL || s64 == e64) return; PMAP_READ_LOCK(map, spl); - if (value_64bit(s64) || value_64bit(e64)) { - panic("pmap_remove addr overflow"); - } - - orig_s = s = (vm_offset_t)low32(s64); - e = (vm_offset_t)low32(e64); - - pde = pmap_pde(map, s); - - while (s < e) { - l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); - if (l > e) - l = e; - if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); - spte = &spte[ptenum(s)]; - epte = &spte[intel_btop(l-s)]; - pmap_remove_range(map, s, spte, epte); + orig_s64 = s64; + + while (s64 < e64) { + l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1); + if (l64 > e64) + l64 = e64; + pde = pmap_pde(map, s64); + if (pde && (*pde & INTEL_PTE_VALID)) { + spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1))); + spte = &spte[ptenum(s64)]; + epte = &spte[intel_btop(l64-s64)]; + pmap_remove_range(map, s64, spte, epte); } - s = l; + s64 = l64; pde++; } - - PMAP_UPDATE_TLBS(map, orig_s, e); + PMAP_UPDATE_TLBS(map, orig_s64, e64); PMAP_READ_UNLOCK(map, spl); } @@ -1420,10 +1772,10 @@ pmap_page_protect( register pmap_t pmap; spl_t spl; boolean_t remove; - pmap_paddr_t phys; + pmap_paddr_t phys; assert(pn != vm_page_fictitious_addr); - phys = (pmap_paddr_t)i386_ptob(pn); + if (!valid_page(pn)) { /* * Not a managed page. @@ -1445,17 +1797,17 @@ pmap_page_protect( remove = TRUE; break; } + phys = (pmap_paddr_t)i386_ptob(pn); + pai = pa_index(phys); + pv_h = pai_to_pvh(pai); + /* * Lock the pmap system first, since we will be changing * several pmaps. */ - PMAP_WRITE_LOCK(spl); - pai = pa_index(phys); - pv_h = pai_to_pvh(pai); - /* * Walk down PV list, changing or removing all mappings. * We do not have to lock the pv_list because we have @@ -1463,93 +1815,95 @@ pmap_page_protect( */ if (pv_h->pmap != PMAP_NULL) { - prev = pv_e = pv_h; - do { - register vm_offset_t va; - pmap = pv_e->pmap; - /* - * Lock the pmap to block pmap_extract and similar routines. - */ - simple_lock(&pmap->lock); - - { - - va = pv_e->va; - pte = pmap_pte(pmap, va); - - /* - * Consistency checks. - */ - /* assert(*pte & INTEL_PTE_VALID); XXX */ - /* assert(pte_to_phys(*pte) == phys); */ - - } + prev = pv_e = pv_h; - /* - * Remove the mapping if new protection is NONE - * or if write-protecting a kernel mapping. - */ - if (remove || pmap == kernel_pmap) { - /* - * Remove the mapping, collecting any modify bits. - */ - { - pmap_phys_attributes[pai] |= - *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - *pte++ = 0; - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); - } + do { + register vm_map_offset_t vaddr; - assert(pmap->stats.resident_count >= 1); - pmap->stats.resident_count--; + pmap = pv_e->pmap; + /* + * Lock the pmap to block pmap_extract and similar routines. + */ + simple_lock(&pmap->lock); - /* - * Remove the pv_entry. - */ - if (pv_e == pv_h) { + vaddr = pv_e->va; + pte = pmap_pte(pmap, vaddr); + if(0 == pte) { + kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr); + panic("pmap_page_protect"); + } /* - * Fix up head later. + * Consistency checks. */ - pv_h->pmap = PMAP_NULL; - } - else { + /* assert(*pte & INTEL_PTE_VALID); XXX */ + /* assert(pte_to_phys(*pte) == phys); */ + + /* - * Delete this entry. + * Remove the mapping if new protection is NONE + * or if write-protecting a kernel mapping. */ - prev->next = pv_e->next; - PV_FREE(pv_e); - } - } - else { - /* - * Write-protect. - */ + if (remove || pmap == kernel_pmap) { + /* + * Remove the mapping, collecting any modify bits. + */ + pmap_store_pte(pte, *pte & ~INTEL_PTE_VALID); - *pte &= ~INTEL_PTE_WRITE; - pte++; - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); - /* - * Advance prev. - */ - prev = pv_e; - } + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - simple_unlock(&pmap->lock); + pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - } while ((pv_e = prev->next) != PV_ENTRY_NULL); + pmap_store_pte(pte, 0); - /* - * If pv_head mapping was removed, fix it up. - */ - if (pv_h->pmap == PMAP_NULL) { - pv_e = pv_h->next; - if (pv_e != PV_ENTRY_NULL) { - *pv_h = *pv_e; - PV_FREE(pv_e); + + //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1); + pmap->stats.resident_count--; + + /* + * Remove the pv_entry. + */ + if (pv_e == pv_h) { + /* + * Fix up head later. + */ + pv_h->pmap = PMAP_NULL; + } + else { + /* + * Delete this entry. + */ + prev->next = pv_e->next; + PV_FREE(pv_e); + } + } else { + /* + * Write-protect. + */ + pmap_store_pte(pte, *pte & ~INTEL_PTE_WRITE); + + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + /* + * Advance prev. + */ + prev = pv_e; + } + + simple_unlock(&pmap->lock); + + } while ((pv_e = prev->next) != PV_ENTRY_NULL); + + /* + * If pv_head mapping was removed, fix it up. + */ + if (pv_h->pmap == PMAP_NULL) { + pv_e = pv_h->next; + + if (pv_e != PV_ENTRY_NULL) { + *pv_h = *pv_e; + PV_FREE(pv_e); + } } - } } - PMAP_WRITE_UNLOCK(spl); } @@ -1577,64 +1931,89 @@ unsigned int pmap_disconnect( void pmap_protect( pmap_t map, - vm_offset_t s, - vm_offset_t e, + vm_map_offset_t sva, + vm_map_offset_t eva, vm_prot_t prot) { register pt_entry_t *pde; register pt_entry_t *spte, *epte; - vm_offset_t l; + vm_map_offset_t lva; + vm_map_offset_t orig_sva; spl_t spl; - vm_offset_t orig_s = s; - + boolean_t set_NX; if (map == PMAP_NULL) return; - /* - * Determine the new protection. - */ - switch (prot) { - case VM_PROT_READ: - case VM_PROT_READ|VM_PROT_EXECUTE: - break; - case VM_PROT_READ|VM_PROT_WRITE: - case VM_PROT_ALL: - return; /* nothing to do */ - default: - pmap_remove(map, (addr64_t)s, (addr64_t)e); + if (prot == VM_PROT_NONE) { + pmap_remove(map, sva, eva); return; } + if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled ) + set_NX = FALSE; + else + set_NX = TRUE; + SPLVM(spl); simple_lock(&map->lock); - pde = pmap_pde(map, s); - while (s < e) { - l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); - if (l > e) - l = e; - if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); - spte = &spte[ptenum(s)]; - epte = &spte[intel_btop(l-s)]; + orig_sva = sva; + while (sva < eva) { + lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); + if (lva > eva) + lva = eva; + pde = pmap_pde(map, sva); + if (pde && (*pde & INTEL_PTE_VALID)) { + spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); + spte = &spte[ptenum(sva)]; + epte = &spte[intel_btop(lva-sva)]; while (spte < epte) { - if (*spte & INTEL_PTE_VALID) - *spte &= ~INTEL_PTE_WRITE; + if (*spte & INTEL_PTE_VALID) { + + if (prot & VM_PROT_WRITE) + pmap_store_pte(spte, *spte | INTEL_PTE_WRITE); + else + pmap_store_pte(spte, *spte & ~INTEL_PTE_WRITE); + + if (set_NX == TRUE) + pmap_store_pte(spte, *spte | INTEL_PTE_NX); + else + pmap_store_pte(spte, *spte & ~INTEL_PTE_NX); + + } spte++; } } - s = l; + sva = lva; pde++; } - - PMAP_UPDATE_TLBS(map, orig_s, e); + PMAP_UPDATE_TLBS(map, orig_sva, eva); simple_unlock(&map->lock); SPLX(spl); } +/* Map a (possibly) autogenned block */ +void +pmap_map_block( + pmap_t pmap, + addr64_t va, + ppnum_t pa, + uint32_t size, + vm_prot_t prot, + int attr, + __unused unsigned int flags) +{ + uint32_t page; + + for (page = 0; page < size; page++) { + pmap_enter(pmap, va, pa, prot, attr, TRUE); + va += PAGE_SIZE; + pa++; + } +} /* @@ -1652,7 +2031,7 @@ pmap_protect( void pmap_enter( register pmap_t pmap, - vm_offset_t v, + vm_map_offset_t vaddr, ppnum_t pn, vm_prot_t prot, unsigned int flags, @@ -1665,19 +2044,26 @@ pmap_enter( pt_entry_t template; spl_t spl; pmap_paddr_t old_pa; - pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); + pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); + boolean_t need_tlbflush = FALSE; + boolean_t set_NX; - XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n", + XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n", current_thread(), current_thread(), - pmap, v, pn); + pmap, vaddr, pn); assert(pn != vm_page_fictitious_addr); if (pmap_debug) - printf("pmap(%x, %x)\n", v, pn); + printf("pmap(%qx, %x)\n", vaddr, pn); if (pmap == PMAP_NULL) return; + if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled ) + set_NX = FALSE; + else + set_NX = TRUE; + /* * Must allocate a new pvlist entry while we're unlocked; * zalloc may cause pageout (which will lock the pmap system). @@ -1695,13 +2081,13 @@ pmap_enter( * pages to map one VM page. */ - while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) { + while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { /* * Must unlock to expand the pmap. */ PMAP_READ_UNLOCK(pmap, spl); - pmap_expand(pmap, v); + pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */ PMAP_READ_LOCK(pmap, spl); } @@ -1717,7 +2103,7 @@ pmap_enter( template = pa_to_pte(pa) | INTEL_PTE_VALID; - if(flags & VM_MEM_NOT_CACHEABLE) { + if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { if(!(flags & VM_MEM_GUARDED)) template |= INTEL_PTE_PTA; template |= INTEL_PTE_NCACHE; @@ -1727,6 +2113,10 @@ pmap_enter( template |= INTEL_PTE_USER; if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; + + if (set_NX == TRUE) + template |= INTEL_PTE_NX; + if (wired) { template |= INTEL_PTE_WIRED; if (!iswired(*pte)) @@ -1741,9 +2131,11 @@ pmap_enter( if (*pte & INTEL_PTE_MOD) template |= INTEL_PTE_MOD; - WRITE_PTE(pte, template) - pte++; + pmap_store_pte(pte, template); + pte++; + + need_tlbflush = TRUE; goto Done; } @@ -1754,6 +2146,7 @@ pmap_enter( * 2) Add pvlist entry for new mapping * 3) Enter new mapping. * + * SHARING FAULTS IS HORRIBLY BROKEN * SHARING_FAULTS complicates this slightly in that it cannot * replace the mapping, but must remove it (because adding the * pvlist entry for the new mapping may remove others), and @@ -1767,12 +2160,6 @@ pmap_enter( if (old_pa != (pmap_paddr_t) 0) { - -#if DEBUG_PTE_PAGE - if (pmap != kernel_pmap) - ptep_check(get_pte_page(pte)); -#endif /* DEBUG_PTE_PAGE */ - /* * Don't do anything to pages outside valid memory here. * Instead convince the code that enters a new mapping @@ -1793,8 +2180,8 @@ pmap_enter( pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - WRITE_PTE(pte, 0) + pmap_store_pte(pte, 0); /* * Remove the mapping from the pvlist for * this physical page. @@ -1806,7 +2193,7 @@ pmap_enter( if (pv_h->pmap == PMAP_NULL) { panic("pmap_enter: null pv_list!"); } - if (pv_h->va == v && pv_h->pmap == pmap) { + if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* * Header is the pv_entry. Copy the next one * to header and free the next one (we cannot @@ -1828,7 +2215,7 @@ pmap_enter( if ((cur = prev->next) == PV_ENTRY_NULL) { panic("pmap_enter: mapping not in pv_list!"); } - } while (cur->va != v || cur->pmap != pmap); + } while (cur->va != vaddr || cur->pmap != pmap); prev->next = cur->next; pv_e = cur; } @@ -1843,13 +2230,13 @@ pmap_enter( * one). Do removal part of accounting. */ old_pa = (pmap_paddr_t) 0; - assert(pmap->stats.resident_count >= 1); - pmap->stats.resident_count--; + if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); pmap->stats.wired_count--; } } + need_tlbflush = TRUE; } @@ -1863,7 +2250,7 @@ pmap_enter( pai = pa_index(pa); -#if SHARING_FAULTS +#if SHARING_FAULTS /* this is horribly broken , do not enable */ RetryPvList: /* * We can return here from the sharing fault code below @@ -1878,7 +2265,7 @@ pmap_enter( /* * No mappings yet */ - pv_h->va = v; + pv_h->va = vaddr; pv_h->pmap = pmap; pv_h->next = PV_ENTRY_NULL; } @@ -1891,13 +2278,13 @@ pmap_enter( */ pv_entry_t e = pv_h; while (e != PV_ENTRY_NULL) { - if (e->pmap == pmap && e->va == v) + if (e->pmap == pmap && e->va == vaddr) panic("pmap_enter: already in pv_list"); e = e->next; } } #endif /* DEBUG */ -#if SHARING_FAULTS +#if SHARING_FAULTS /* broken, do not enable */ { /* * do sharing faults. @@ -1923,6 +2310,7 @@ pmap_enter( */ pmap_remove_range(pmap, e->va, opte, opte + 1); + PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE); /* @@ -1976,7 +2364,7 @@ pmap_enter( if (logit) { pma = &pmap_aliasbuf[pmap_alias_index]; pma->pmap = pmap; - pma->va = v; + pma->va = vaddr; pma->rpc = rpc; pma->cookie = PMAP_ALIAS_COOKIE; if (++pmap_alias_index >= PMAP_ALIAS_MAX) @@ -1996,7 +2384,7 @@ pmap_enter( panic("pmap no pv_e's"); } } - pv_e->va = v; + pv_e->va = vaddr; pv_e->pmap = pmap; pv_e->next = pv_h->next; pv_h->next = pv_e; @@ -2006,13 +2394,18 @@ pmap_enter( pv_e = PV_ENTRY_NULL; } UNLOCK_PVH(pai); + + /* + * only count the mapping + * for 'managed memory' + */ + pmap->stats.resident_count++; } /* - * Step 3) Enter and count the mapping. + * Step 3) Enter the mapping. */ - pmap->stats.resident_count++; /* * Build a template to speed up entering - @@ -2030,18 +2423,22 @@ pmap_enter( template |= INTEL_PTE_USER; if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; + + if (set_NX == TRUE) + template |= INTEL_PTE_NX; + if (wired) { template |= INTEL_PTE_WIRED; pmap->stats.wired_count++; } - - WRITE_PTE(pte, template) + pmap_store_pte(pte, template); Done: - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); + if (need_tlbflush == TRUE) + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); if (pv_e != PV_ENTRY_NULL) { - PV_FREE(pv_e); + PV_FREE(pv_e); } PMAP_READ_UNLOCK(pmap, spl); @@ -2057,7 +2454,7 @@ pmap_enter( void pmap_change_wiring( register pmap_t map, - vm_offset_t v, + vm_map_offset_t vaddr, boolean_t wired) { register pt_entry_t *pte; @@ -2070,7 +2467,7 @@ pmap_change_wiring( */ PMAP_READ_LOCK(map, spl); - if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL) + if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) panic("pmap_change_wiring: pte missing"); if (wired && !iswired(*pte)) { @@ -2078,7 +2475,8 @@ pmap_change_wiring( * wiring down mapping */ map->stats.wired_count++; - *pte++ |= INTEL_PTE_WIRED; + pmap_store_pte(pte, *pte | INTEL_PTE_WIRED); + pte++; } else if (!wired && iswired(*pte)) { /* @@ -2086,7 +2484,8 @@ pmap_change_wiring( */ assert(map->stats.wired_count >= 1); map->stats.wired_count--; - *pte++ &= ~INTEL_PTE_WIRED; + pmap_store_pte(pte, *pte & ~INTEL_PTE_WIRED); + pte++; } PMAP_READ_UNLOCK(map, spl); @@ -2101,18 +2500,18 @@ ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) { pt_entry_t *ptp; - vm_offset_t a32; ppnum_t ppn; - if (value_64bit(va)) - panic("pmap_find_phys 64 bit value"); - a32 = (vm_offset_t) low32(va); - ptp = pmap_pte(pmap, a32); + mp_disable_preemption(); + + ptp = pmap_pte(pmap, va); if (PT_ENTRY_NULL == ptp) { ppn = 0; } else { ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp)); } + mp_enable_preemption(); + return ppn; } @@ -2129,54 +2528,40 @@ pmap_find_phys(pmap_t pmap, addr64_t va) vm_offset_t pmap_extract( register pmap_t pmap, - vm_offset_t va) + vm_map_offset_t vaddr) { - ppnum_t ppn; - vm_offset_t vaddr; + ppnum_t ppn; + vm_offset_t paddr; - vaddr = (vm_offset_t)0; - ppn = pmap_find_phys(pmap, (addr64_t)va); - if (ppn) { - vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK); - } - return (vaddr); + paddr = (vm_offset_t)0; + ppn = pmap_find_phys(pmap, vaddr); + if (ppn) { + paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK); + } + return (paddr); } - -/* - * Routine: pmap_expand - * - * Expands a pmap to be able to map the specified virtual address. - * - * Allocates new virtual memory for the P0 or P1 portion of the - * pmap, then re-maps the physical pages that were in the old - * pmap to be in the new pmap. - * - * Must be called with the pmap system and the pmap unlocked, - * since these must be unlocked to use vm_allocate or vm_deallocate. - * Thus it must be called in a loop that checks whether the map - * has been expanded enough. - * (We won't loop forever, since page tables aren't shrunk.) - */ void -pmap_expand( - register pmap_t map, - register vm_offset_t v) +pmap_expand_pml4( + pmap_t map, + vm_map_offset_t vaddr) { - pt_entry_t *pdp; register vm_page_t m; register pmap_paddr_t pa; - register int i; + uint64_t i; spl_t spl; ppnum_t pn; + pml4_entry_t *pml4p; - if (map == kernel_pmap) { - pmap_growkernel(v); - return; - } + if (kernel_pmap == map) panic("expand kernel pml4"); + + spl = splhigh(); + pml4p = pmap64_pml4(map, vaddr); + splx(spl); + if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p"); /* - * Allocate a VM page for the level 2 page table entries. + * Allocate a VM page for the pml4 page */ while ((m = vm_page_grab()) == VM_PAGE_NULL) VM_PAGE_WAIT(); @@ -2187,14 +2572,25 @@ pmap_expand( */ pn = m->phys_page; pa = i386_ptob(pn); - i = pdenum(map, v); - vm_object_lock(map->pm_obj); - vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); + i = pml4idx(map, vaddr); + + vm_object_lock(map->pm_obj_pml4); +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { + kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj_pml4, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); + vm_page_lock_queues(); vm_page_wire(m); - inuse_ptepages_count++; - vm_object_unlock(map->pm_obj); + vm_page_unlock_queues(); + vm_object_unlock(map->pm_obj_pml4); + inuse_ptepages_count++; + map->stats.resident_count++; + map->stats.wired_count++; /* * Zero the page. @@ -2205,14 +2601,17 @@ pmap_expand( /* * See if someone else expanded us first */ - if (pmap_pte(map, v) != PT_ENTRY_NULL) { + if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { PMAP_READ_UNLOCK(map, spl); - vm_object_lock(map->pm_obj); + vm_object_lock(map->pm_obj_pml4); vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; + map->stats.resident_count--; + map->stats.wired_count--; + vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj); + vm_object_unlock(map->pm_obj_pml4); return; } @@ -2222,120 +2621,317 @@ pmap_expand( * set several page directory entries. */ - pdp = &map->dirbase[pdenum(map, v)]; - *pdp = pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE; + pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ - PMAP_READ_UNLOCK(map, spl); - return; -} + pmap_store_pte(pml4p, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); -/* - * Copy the range specified by src_addr/len - * from the source map to the range dst_addr/len - * in the destination map. - * - * This routine is only advisory and need not do anything. - */ -#if 0 -void -pmap_copy( - pmap_t dst_pmap, - pmap_t src_pmap, - vm_offset_t dst_addr, - vm_size_t len, - vm_offset_t src_addr) -{ -#ifdef lint - dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++; -#endif /* lint */ -} -#endif/* 0 */ + PMAP_READ_UNLOCK(map, spl); -/* - * pmap_sync_page_data_phys(ppnum_t pa) - * - * Invalidates all of the instruction cache on a physical page and - * pushes any dirty data from the data cache for the same physical page - * Not required in i386. - */ -void -pmap_sync_page_data_phys(__unused ppnum_t pa) -{ return; + } -/* - * pmap_sync_page_attributes_phys(ppnum_t pa) - * - * Write back and invalidate all cachelines on a physical page. - */ void -pmap_sync_page_attributes_phys(ppnum_t pa) +pmap_expand_pdpt( + pmap_t map, + vm_map_offset_t vaddr) { - cache_flush_page_phys(pa); -} + register vm_page_t m; + register pmap_paddr_t pa; + uint64_t i; + spl_t spl; + ppnum_t pn; + pdpt_entry_t *pdptp; -int collect_ref; -int collect_unref; + if (kernel_pmap == map) panic("expand kernel pdpt"); -/* - * Routine: pmap_collect - * Function: - * Garbage collects the physical map system for - * pages which are no longer used. - * Success need not be guaranteed -- that is, there - * may well be pages which are not referenced, but - * others may be collected. - * Usage: - * Called by the pageout daemon when pages are scarce. - */ -void -pmap_collect( - pmap_t p) -{ - register pt_entry_t *pdp, *ptp; - pt_entry_t *eptp; - int wired; - spl_t spl; + spl = splhigh(); + while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { + splx(spl); + pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */ + spl = splhigh(); + } + splx(spl); - if (p == PMAP_NULL) - return; - if (p == kernel_pmap) - return; + /* + * Allocate a VM page for the pdpt page + */ + while ((m = vm_page_grab()) == VM_PAGE_NULL) + VM_PAGE_WAIT(); /* - * Garbage collect map. + * put the page into the pmap's obj list so it + * can be found later. */ - PMAP_READ_LOCK(p, spl); + pn = m->phys_page; + pa = i386_ptob(pn); + i = pdptidx(map, vaddr); - for (pdp = (pt_entry_t *)p->dirbase; - pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; - pdp++) - { - if (*pdp & INTEL_PTE_VALID) { - if(*pdp & INTEL_PTE_REF) { - *pdp &= ~INTEL_PTE_REF; - collect_ref++; - } else { - collect_unref++; - ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); - eptp = ptp + NPTEPG; + vm_object_lock(map->pm_obj_pdpt); +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { + kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj_pdpt, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); - /* - * If the pte page has any wired mappings, we cannot - * free it. - */ - wired = 0; - { - register pt_entry_t *ptep; - for (ptep = ptp; ptep < eptp; ptep++) { - if (iswired(*ptep)) { - wired = 1; - break; + vm_page_lock_queues(); + vm_page_wire(m); + + vm_page_unlock_queues(); + vm_object_unlock(map->pm_obj_pdpt); + inuse_ptepages_count++; + map->stats.resident_count++; + map->stats.wired_count++; + + /* + * Zero the page. + */ + pmap_zero_page(pn); + + PMAP_READ_LOCK(map, spl); + /* + * See if someone else expanded us first + */ + if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { + PMAP_READ_UNLOCK(map, spl); + vm_object_lock(map->pm_obj_pdpt); + vm_page_lock_queues(); + vm_page_free(m); + inuse_ptepages_count--; + map->stats.resident_count--; + map->stats.wired_count--; + + vm_page_unlock_queues(); + vm_object_unlock(map->pm_obj_pdpt); + return; + } + + /* + * Set the page directory entry for this page table. + * If we have allocated more than one hardware page, + * set several page directory entries. + */ + + pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ + + pmap_store_pte(pdptp, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + + PMAP_READ_UNLOCK(map, spl); + + return; + +} + + + +/* + * Routine: pmap_expand + * + * Expands a pmap to be able to map the specified virtual address. + * + * Allocates new virtual memory for the P0 or P1 portion of the + * pmap, then re-maps the physical pages that were in the old + * pmap to be in the new pmap. + * + * Must be called with the pmap system and the pmap unlocked, + * since these must be unlocked to use vm_allocate or vm_deallocate. + * Thus it must be called in a loop that checks whether the map + * has been expanded enough. + * (We won't loop forever, since page tables aren't shrunk.) + */ +void +pmap_expand( + pmap_t map, + vm_map_offset_t vaddr) +{ + pt_entry_t *pdp; + register vm_page_t m; + register pmap_paddr_t pa; + uint64_t i; + spl_t spl; + ppnum_t pn; + + /* + * if not the kernel map (while we are still compat kernel mode) + * and we are 64 bit, propagate expand upwards + */ + + if (cpu_64bit && (map != kernel_pmap)) { + spl = splhigh(); + while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { + splx(spl); + pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */ + spl = splhigh(); + } + splx(spl); + } else { + pdp = pmap_pde(map, vaddr); + } + + + /* + * Allocate a VM page for the pde entries. + */ + while ((m = vm_page_grab()) == VM_PAGE_NULL) + VM_PAGE_WAIT(); + + /* + * put the page into the pmap's obj list so it + * can be found later. + */ + pn = m->phys_page; + pa = i386_ptob(pn); + i = pdeidx(map, vaddr); + + vm_object_lock(map->pm_obj); +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { + kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); + + vm_page_lock_queues(); + vm_page_wire(m); + inuse_ptepages_count++; + + vm_page_unlock_queues(); + vm_object_unlock(map->pm_obj); + + /* + * Zero the page. + */ + pmap_zero_page(pn); + + PMAP_READ_LOCK(map, spl); + /* + * See if someone else expanded us first + */ + if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { + PMAP_READ_UNLOCK(map, spl); + vm_object_lock(map->pm_obj); + + vm_page_lock_queues(); + vm_page_free(m); + inuse_ptepages_count--; + + vm_page_unlock_queues(); + vm_object_unlock(map->pm_obj); + return; + } + + pdp = pmap_pde(map, vaddr); /* refetch while locked */ + + /* + * Set the page directory entry for this page table. + * If we have allocated more than one hardware page, + * set several page directory entries. + */ + + pmap_store_pte(pdp, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + + + PMAP_READ_UNLOCK(map, spl); + + return; +} + + +/* + * pmap_sync_page_data_phys(ppnum_t pa) + * + * Invalidates all of the instruction cache on a physical page and + * pushes any dirty data from the data cache for the same physical page + * Not required in i386. + */ +void +pmap_sync_page_data_phys(__unused ppnum_t pa) +{ + return; +} + +/* + * pmap_sync_page_attributes_phys(ppnum_t pa) + * + * Write back and invalidate all cachelines on a physical page. + */ +void +pmap_sync_page_attributes_phys(ppnum_t pa) +{ + cache_flush_page_phys(pa); +} + +int collect_ref; +int collect_unref; + +/* + * Routine: pmap_collect + * Function: + * Garbage collects the physical map system for + * pages which are no longer used. + * Success need not be guaranteed -- that is, there + * may well be pages which are not referenced, but + * others may be collected. + * Usage: + * Called by the pageout daemon when pages are scarce. + */ +void +pmap_collect( + pmap_t p) +{ + register pt_entry_t *pdp, *ptp; + pt_entry_t *eptp; + int wired; + spl_t spl; + + if (p == PMAP_NULL) + return; + + if (p == kernel_pmap) + return; + + /* + * Garbage collect map. + */ + PMAP_READ_LOCK(p, spl); + + for (pdp = (pt_entry_t *)p->dirbase; + pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; + pdp++) + { + if (*pdp & INTEL_PTE_VALID) { + if(*pdp & INTEL_PTE_REF) { + pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); + collect_ref++; + } else { + collect_unref++; + ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); + eptp = ptp + NPTEPG; + + /* + * If the pte page has any wired mappings, we cannot + * free it. + */ + wired = 0; + { + register pt_entry_t *ptep; + for (ptep = ptp; ptep < eptp; ptep++) { + if (iswired(*ptep)) { + wired = 1; + break; } } } @@ -2351,7 +2947,7 @@ pmap_collect( /* * Invalidate the page directory pointer. */ - *pdp = 0x0; + pmap_store_pte(pdp, 0x0); PMAP_READ_UNLOCK(p, spl); @@ -2378,32 +2974,21 @@ pmap_collect( } } PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS); + PMAP_READ_UNLOCK(p, spl); return; } -/* - * Routine: pmap_kernel - * Function: - * Returns the physical map handle for the kernel. - */ -#if 0 -pmap_t -pmap_kernel(void) -{ - return (kernel_pmap); -} -#endif/* 0 */ void pmap_copy_page(src, dst) ppnum_t src; ppnum_t dst; { - bcopy_phys((addr64_t)i386_ptob(src), - (addr64_t)i386_ptob(dst), - PAGE_SIZE); + bcopy_phys((addr64_t)i386_ptob(src), + (addr64_t)i386_ptob(dst), + PAGE_SIZE); } @@ -2424,8 +3009,8 @@ pmap_copy_page(src, dst) void pmap_pageable( __unused pmap_t pmap, - __unused vm_offset_t start_addr, - __unused vm_offset_t end_addr, + __unused vm_map_offset_t start_addr, + __unused vm_map_offset_t end_addr, __unused boolean_t pageable) { #ifdef lint @@ -2485,7 +3070,7 @@ phys_attribute_clear( simple_lock(&pmap->lock); { - register vm_offset_t va; + register vm_map_offset_t va; va = pv_e->va; pte = pmap_pte(pmap, va); @@ -2502,7 +3087,8 @@ phys_attribute_clear( * Clear modify or reference bits. */ - *pte++ &= ~bits; + pmap_store_pte(pte, *pte & ~bits); + pte++; PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } simple_unlock(&pmap->lock); @@ -2539,15 +3125,24 @@ phys_attribute_test( return (FALSE); } + phys = i386_ptob(pn); + pai = pa_index(phys); + /* + * super fast check... if bits already collected + * no need to take any locks... + * if not set, we need to recheck after taking + * the lock in case they got pulled in while + * we were waiting for the lock + */ + if (pmap_phys_attributes[pai] & bits) + return (TRUE); + pv_h = pai_to_pvh(pai); + /* * Lock the pmap system first, since we will be checking * several pmaps. */ - PMAP_WRITE_LOCK(spl); - phys = i386_ptob(pn); - pai = pa_index(phys); - pv_h = pai_to_pvh(pai); if (pmap_phys_attributes[pai] & bits) { PMAP_WRITE_UNLOCK(spl); @@ -2572,7 +3167,7 @@ phys_attribute_test( simple_lock(&pmap->lock); { - register vm_offset_t va; + register vm_map_offset_t va; va = pv_e->va; pte = pmap_pte(pmap, va); @@ -2738,45 +3333,49 @@ pmap_clear_refmod(ppnum_t pa, unsigned int mask) void pmap_modify_pages( pmap_t map, - vm_offset_t s, - vm_offset_t e) + vm_map_offset_t sva, + vm_map_offset_t eva) { spl_t spl; register pt_entry_t *pde; register pt_entry_t *spte, *epte; - vm_offset_t l; - vm_offset_t orig_s = s; + vm_map_offset_t lva; + vm_map_offset_t orig_sva; if (map == PMAP_NULL) return; PMAP_READ_LOCK(map, spl); - pde = pmap_pde(map, s); - while (s && s < e) { - l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); - if (l > e) - l = e; - if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); - if (l) { - spte = &spte[ptenum(s)]; - epte = &spte[intel_btop(l-s)]; + orig_sva = sva; + while (sva && sva < eva) { + lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); + if (lva > eva) + lva = eva; + pde = pmap_pde(map, sva); + if (pde && (*pde & INTEL_PTE_VALID)) { + spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); + if (lva) { + spte = &spte[ptenum(sva)]; + epte = &spte[intel_btop(lva-sva)]; } else { - epte = &spte[intel_btop(PDE_MAPPED_SIZE)]; - spte = &spte[ptenum(s)]; + epte = &spte[intel_btop(pde_mapped_size)]; + spte = &spte[ptenum(sva)]; } while (spte < epte) { if (*spte & INTEL_PTE_VALID) { - *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE); + pmap_store_pte(spte, *spte + | INTEL_PTE_MOD + | INTEL_PTE_WRITE); } spte++; } } - s = l; + sva = lva; pde++; } - PMAP_UPDATE_TLBS(map, orig_s, e); + PMAP_UPDATE_TLBS(map, orig_sva, eva); + PMAP_READ_UNLOCK(map, spl); } @@ -2796,218 +3395,14 @@ flush_dcache(__unused vm_offset_t addr, return; } -/* -* TLB Coherence Code (TLB "shootdown" code) -* -* Threads that belong to the same task share the same address space and -* hence share a pmap. However, they may run on distinct cpus and thus -* have distinct TLBs that cache page table entries. In order to guarantee -* the TLBs are consistent, whenever a pmap is changed, all threads that -* are active in that pmap must have their TLB updated. To keep track of -* this information, the set of cpus that are currently using a pmap is -* maintained within each pmap structure (cpus_using). Pmap_activate() and -* pmap_deactivate add and remove, respectively, a cpu from this set. -* Since the TLBs are not addressable over the bus, each processor must -* flush its own TLB; a processor that needs to invalidate another TLB -* needs to interrupt the processor that owns that TLB to signal the -* update. -* -* Whenever a pmap is updated, the lock on that pmap is locked, and all -* cpus using the pmap are signaled to invalidate. All threads that need -* to activate a pmap must wait for the lock to clear to await any updates -* in progress before using the pmap. They must ACQUIRE the lock to add -* their cpu to the cpus_using set. An implicit assumption made -* throughout the TLB code is that all kernel code that runs at or higher -* than splvm blocks out update interrupts, and that such code does not -* touch pageable pages. -* -* A shootdown interrupt serves another function besides signaling a -* processor to invalidate. The interrupt routine (pmap_update_interrupt) -* waits for the both the pmap lock (and the kernel pmap lock) to clear, -* preventing user code from making implicit pmap updates while the -* sending processor is performing its update. (This could happen via a -* user data write reference that turns on the modify bit in the page -* table). It must wait for any kernel updates that may have started -* concurrently with a user pmap update because the IPC code -* changes mappings. -* Spinning on the VALUES of the locks is sufficient (rather than -* having to acquire the locks) because any updates that occur subsequent -* to finding the lock unlocked will be signaled via another interrupt. -* (This assumes the interrupt is cleared before the low level interrupt code -* calls pmap_update_interrupt()). -* -* The signaling processor must wait for any implicit updates in progress -* to terminate before continuing with its update. Thus it must wait for an -* acknowledgement of the interrupt from each processor for which such -* references could be made. For maintaining this information, a set -* cpus_active is used. A cpu is in this set if and only if it can -* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from -* this set; when all such cpus are removed, it is safe to update. -* -* Before attempting to acquire the update lock on a pmap, a cpu (A) must -* be at least at the priority of the interprocessor interrupt -* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a -* kernel update; it would spin forever in pmap_update_interrupt() trying -* to acquire the user pmap lock it had already acquired. Furthermore A -* must remove itself from cpus_active. Otherwise, another cpu holding -* the lock (B) could be in the process of sending an update signal to A, -* and thus be waiting for A to remove itself from cpus_active. If A is -* spinning on the lock at priority this will never happen and a deadlock -* will result. -*/ - -/* - * Signal another CPU that it must flush its TLB - */ -void -signal_cpus( - cpu_set use_list, - pmap_t pmap, - vm_offset_t start_addr, - vm_offset_t end_addr) -{ - register int which_cpu, j; - register pmap_update_list_t update_list_p; - - while ((which_cpu = ffs((unsigned long)use_list)) != 0) { - which_cpu -= 1; /* convert to 0 origin */ +#if MACH_KDB - update_list_p = cpu_update_list(which_cpu); - simple_lock(&update_list_p->lock); +/* show phys page mappings and attributes */ - j = update_list_p->count; - if (j >= UPDATE_LIST_SIZE) { - /* - * list overflowed. Change last item to - * indicate overflow. - */ - update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap; - update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS; - update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS; - } - else { - update_list_p->item[j].pmap = pmap; - update_list_p->item[j].start = start_addr; - update_list_p->item[j].end = end_addr; - update_list_p->count = j+1; - } - cpu_update_needed(which_cpu) = TRUE; - simple_unlock(&update_list_p->lock); - - /* if its the kernel pmap, ignore cpus_idle */ - if (((cpus_idle & (1 << which_cpu)) == 0) || - (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap) - { - i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC); - } - use_list &= ~(1 << which_cpu); - } -} +extern void db_show_page(pmap_paddr_t pa); void -process_pmap_updates( - register pmap_t my_pmap) -{ - register int my_cpu; - register pmap_update_list_t update_list_p; - register int j; - register pmap_t pmap; - - mp_disable_preemption(); - my_cpu = cpu_number(); - update_list_p = cpu_update_list(my_cpu); - simple_lock(&update_list_p->lock); - - for (j = 0; j < update_list_p->count; j++) { - pmap = update_list_p->item[j].pmap; - if (pmap == my_pmap || - pmap == kernel_pmap) { - - if (pmap->ref_count <= 0) { - PMAP_CPU_CLR(pmap, my_cpu); - PMAP_REAL(my_cpu) = kernel_pmap; -#ifdef PAE - set_cr3((unsigned int)kernel_pmap->pm_ppdpt); -#else - set_cr3((unsigned int)kernel_pmap->pdirbase); -#endif - } else - INVALIDATE_TLB(pmap, - update_list_p->item[j].start, - update_list_p->item[j].end); - } - } - update_list_p->count = 0; - cpu_update_needed(my_cpu) = FALSE; - simple_unlock(&update_list_p->lock); - mp_enable_preemption(); -} - -/* - * Interrupt routine for TBIA requested from other processor. - * This routine can also be called at all interrupts time if - * the cpu was idle. Some driver interrupt routines might access - * newly allocated vm. (This is the case for hd) - */ -void -pmap_update_interrupt(void) -{ - register int my_cpu; - spl_t s; - register pmap_t my_pmap; - - mp_disable_preemption(); - my_cpu = cpu_number(); - - /* - * Raise spl to splvm (above splip) to block out pmap_extract - * from IO code (which would put this cpu back in the active - * set). - */ - s = splhigh(); - - my_pmap = PMAP_REAL(my_cpu); - - if (!(my_pmap && pmap_in_use(my_pmap, my_cpu))) - my_pmap = kernel_pmap; - - do { - LOOP_VAR; - - /* - * Indicate that we're not using either user or kernel - * pmap. - */ - i_bit_clear(my_cpu, &cpus_active); - - /* - * Wait for any pmap updates in progress, on either user - * or kernel pmap. - */ - while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) || - *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) { - LOOP_CHECK("pmap_update_interrupt", my_pmap); - cpu_pause(); - } - - process_pmap_updates(my_pmap); - - i_bit_set(my_cpu, &cpus_active); - - } while (cpu_update_needed(my_cpu)); - - splx(s); - mp_enable_preemption(); -} - -#if MACH_KDB - -/* show phys page mappings and attributes */ - -extern void db_show_page(pmap_paddr_t pa); - -void -db_show_page(pmap_paddr_t pa) +db_show_page(pmap_paddr_t pa) { pv_entry_t pv_h; int pai; @@ -3044,7 +3439,7 @@ void db_kvtophys( vm_offset_t vaddr) { - db_printf("0x%x", kvtophys(vaddr)); + db_printf("0x%qx", kvtophys(vaddr)); } /* @@ -3055,7 +3450,7 @@ db_show_vaddrs( pt_entry_t *dirbase) { pt_entry_t *ptep, *pdep, tmp; - int x, y, pdecnt, ptecnt; + unsigned int x, y, pdecnt, ptecnt; if (dirbase == 0) { dirbase = kernel_pmap->dirbase; @@ -3064,7 +3459,7 @@ db_show_vaddrs( db_printf("need a dirbase...\n"); return; } - dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK); + dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK); db_printf("dirbase: 0x%x\n", dirbase); @@ -3109,153 +3504,24 @@ pmap_list_resident_pages( } #endif /* MACH_VM_DEBUG */ -#ifdef MACH_BSD -/* - * pmap_pagemove - * - * BSD support routine to reassign virtual addresses. - */ - -void -pmap_movepage(unsigned long from, unsigned long to, vm_size_t size) -{ - spl_t spl; - pt_entry_t *pte, saved_pte; - - /* Lock the kernel map */ - PMAP_READ_LOCK(kernel_pmap, spl); - - - while (size > 0) { - pte = pmap_pte(kernel_pmap, from); - if (pte == NULL) - panic("pmap_pagemove from pte NULL"); - saved_pte = *pte; - PMAP_READ_UNLOCK(kernel_pmap, spl); - - pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)), - VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED); - - pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE)); - PMAP_READ_LOCK(kernel_pmap, spl); - pte = pmap_pte(kernel_pmap, to); - if (pte == NULL) - panic("pmap_pagemove 'to' pte NULL"); - - *pte = saved_pte; - - from += PAGE_SIZE; - to += PAGE_SIZE; - size -= PAGE_SIZE; - } - - /* Get the processors to update the TLBs */ - PMAP_UPDATE_TLBS(kernel_pmap, from, from+size); - PMAP_UPDATE_TLBS(kernel_pmap, to, to+size); - - PMAP_READ_UNLOCK(kernel_pmap, spl); - -} -#endif /* MACH_BSD */ /* temporary workaround */ boolean_t -coredumpok(vm_map_t map, vm_offset_t va) +coredumpok(__unused vm_map_t map, __unused vm_offset_t va) { +#if 0 pt_entry_t *ptep; ptep = pmap_pte(map->pmap, va); if (0 == ptep) return FALSE; return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); -} - -/* - * grow the number of kernel page table entries, if needed - */ -void -pmap_growkernel(vm_offset_t addr) -{ -#if GROW_KERNEL_FUNCTION_IMPLEMENTED - struct pmap *pmap; - int s; - vm_offset_t ptppaddr; - ppnum_t ppn; - vm_page_t nkpg; - pd_entry_t newpdir = 0; - - /* - * Serialize. - * Losers return to try again until the winner completes the work. - */ - if (kptobj == 0) panic("growkernel 0"); - if (!vm_object_lock_try(kptobj)) { - return; - } - - vm_page_lock_queues(); - - s = splhigh(); - - /* - * If this is the first time thru, locate the end of the - * kernel page table entries and set nkpt to the current - * number of kernel page table pages - */ - - if (kernel_vm_end == 0) { - kernel_vm_end = KERNBASE; - nkpt = 0; - - while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - nkpt++; - } - } - - /* - * Now allocate and map the required number of page tables - */ - addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - while (kernel_vm_end < addr) { - if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); - continue; /* someone already filled this one */ - } - - nkpg = vm_page_alloc(kptobj, nkpt); - if (!nkpg) - panic("pmap_growkernel: no memory to grow kernel"); - - nkpt++; - vm_page_wire(nkpg); - ppn = nkpg->phys_page; - pmap_zero_page(ppn); - ptppaddr = i386_ptob(ppn); - newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID | - INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD); - pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir; - - simple_lock(&free_pmap_lock); - for (pmap = (struct pmap *)kernel_pmap->pmap_link.next; - pmap != kernel_pmap ; - pmap = (struct pmap *)pmap->pmap_link.next ) { - *pmap_pde(pmap, kernel_vm_end) = newpdir; - } - simple_unlock(&free_pmap_lock); - } - splx(s); - vm_page_unlock_queues(); - vm_object_unlock(kptobj); +#else + return TRUE; #endif } -pt_entry_t * -pmap_mapgetpte(vm_map_t map, vm_offset_t v) -{ - return pmap_pte(map->pmap, v); -} boolean_t phys_page_exists( @@ -3312,27 +3578,88 @@ mapping_adjust() } void -pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) +pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) { int i; pt_entry_t *opte, *npte; pt_entry_t pte; + for (i = 0; i < cnt; i++) { - opte = pmap_pte(kernel_pmap, kernel_commpage); + opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage); if (0 == opte) panic("kernel_commpage"); - npte = pmap_pte(kernel_pmap, user_commpage); - if (0 == npte) panic("user_commpage"); pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL; pte &= ~INTEL_PTE_WRITE; // ensure read only - WRITE_PTE_FAST(npte, pte); + npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage); + if (0 == npte) panic("user_commpage"); + pmap_store_pte(npte, pte); kernel_commpage += INTEL_PGBYTES; user_commpage += INTEL_PGBYTES; } } +#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE) +pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT]; + +void +pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt) +{ + spl_t s; + int i; + pt_entry_t *kptep; + + s = splhigh(); + for (i = 0; i< cnt; i++) { + kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE)); + if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) panic("pmap_commpage64_init pte"); + pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER); + } + splx(s); + +} + +void +pmap_map_sharedpage(__unused task_t task, pmap_t p) +{ + pt_entry_t *ptep; + spl_t s; + int i; + + if (!p->pm_64bit) return; + /* setup high 64 bit commpage */ + s = splhigh(); + while ((ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS)) == PD_ENTRY_NULL) { + splx(s); + pmap_expand(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS); + s = splhigh(); + } + + for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) { + ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE)); + if (0 == ptep) panic("pmap_map_sharedpage"); + pmap_store_pte(ptep, pmap_commpage64_ptes[i]); + } + splx(s); + +} + +void +pmap_unmap_sharedpage(pmap_t pmap) +{ + spl_t s; + pt_entry_t *ptep; + int i; + + if (!pmap->pm_64bit) return; + s = splhigh(); + for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) { + ptep = pmap_pte(pmap, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE)); + if (ptep) pmap_store_pte(ptep, 0); + } + splx(s); +} + static cpu_pmap_t cpu_pmap_master; -static struct pmap_update_list cpu_update_list_master; struct cpu_pmap * pmap_cpu_alloc(boolean_t is_boot_cpu) @@ -3340,13 +3667,13 @@ pmap_cpu_alloc(boolean_t is_boot_cpu) int ret; int i; cpu_pmap_t *cp; - pmap_update_list_t up; vm_offset_t address; + vm_map_address_t mapaddr; vm_map_entry_t entry; + pt_entry_t *pte; if (is_boot_cpu) { cp = &cpu_pmap_master; - up = &cpu_update_list_master; } else { /* * The per-cpu pmap data structure itself. @@ -3360,46 +3687,31 @@ pmap_cpu_alloc(boolean_t is_boot_cpu) bzero((void *)cp, sizeof(cpu_pmap_t)); /* - * The tlb flush update list. + * The temporary windows used for copy/zero - see loose_ends.c */ - ret = kmem_alloc(kernel_map, - (vm_offset_t *) &up, sizeof(*up)); + ret = vm_map_find_space(kernel_map, + &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry); if (ret != KERN_SUCCESS) { - printf("pmap_cpu_alloc() failed ret=%d\n", ret); + printf("pmap_cpu_alloc() " + "vm_map_find_space ret=%d\n", ret); pmap_cpu_free(cp); return NULL; } + address = (vm_offset_t)mapaddr; - /* - * The temporary windows used for copy/zero - see loose_ends.c - */ - for (i = 0; i < PMAP_NWINDOWS; i++) { - ret = vm_map_find_space(kernel_map, - &address, PAGE_SIZE, 0, &entry); - if (ret != KERN_SUCCESS) { - printf("pmap_cpu_alloc() " - "vm_map_find_space ret=%d\n", ret); - pmap_cpu_free(cp); - return NULL; - } - vm_map_unlock(kernel_map); - + for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) { + while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0) + pmap_expand(kernel_pmap, (vm_map_offset_t)address); + * (int *) pte = 0; cp->mapwindow[i].prv_CADDR = (caddr_t) address; - cp->mapwindow[i].prv_CMAP = vtopte(address); - * (int *) cp->mapwindow[i].prv_CMAP = 0; - - kprintf("pmap_cpu_alloc() " - "window=%d CADDR=0x%x CMAP=0x%x\n", - i, address, vtopte(address)); + cp->mapwindow[i].prv_CMAP = pte; } + vm_map_unlock(kernel_map); } - /* - * Set up the pmap request list - */ - cp->update_list = up; - simple_lock_init(&up->lock, 0); - up->count = 0; + cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; + cp->pde_window_index = PMAP_PDE_FIRST_WINDOW; + cp->pte_window_index = PMAP_PTE_FIRST_WINDOW; return cp; } @@ -3408,9 +3720,458 @@ void pmap_cpu_free(struct cpu_pmap *cp) { if (cp != NULL && cp != &cpu_pmap_master) { - if (cp->update_list != NULL) - kfree((void *) cp->update_list, - sizeof(*cp->update_list)); kfree((void *) cp, sizeof(cpu_pmap_t)); } } + + +mapwindow_t * +pmap_get_mapwindow(pt_entry_t pentry) +{ + mapwindow_t *mp; + int i; + boolean_t istate; + + /* + * can be called from hardware interrupt context + * so we need to protect the lookup process + */ + istate = ml_set_interrupts_enabled(FALSE); + + /* + * Note: 0th map reserved for pmap_pte() + */ + for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) { + mp = ¤t_cpu_datap()->cpu_pmap->mapwindow[i]; + + if (*mp->prv_CMAP == 0) { + *mp->prv_CMAP = pentry; + break; + } + } + if (i >= PMAP_NWINDOWS) + mp = NULL; + (void) ml_set_interrupts_enabled(istate); + + return (mp); +} + + +/* + * kern_return_t pmap_nest(grand, subord, vstart, size) + * + * grand = the pmap that we will nest subord into + * subord = the pmap that goes into the grand + * vstart = start of range in pmap to be inserted + * nstart = start of range in pmap nested pmap + * size = Size of nest area (up to 16TB) + * + * Inserts a pmap into another. This is used to implement shared segments. + * + * on x86 this is very limited right now. must be exactly 1 segment. + * + * Note that we depend upon higher level VM locks to insure that things don't change while + * we are doing this. For example, VM should not be doing any pmap enters while it is nesting + * or do 2 nests at once. + */ + + +kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) { + + vm_map_offset_t vaddr, nvaddr; + pd_entry_t *pde,*npde; + unsigned int i, need_flush; + unsigned int num_pde; + spl_t s; + + // do validity tests + + if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */ + if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */ + if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ + if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ + if(size == 0) { + panic("pmap_nest: size is invalid - %016llX\n", size); + } + if ((size >> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size, NBPDE); + + // prepopulate subord pmap pde's if necessary + + if (cpu_64bit) { + s = splhigh(); + while (PD_ENTRY_NULL == (npde = pmap_pde(subord, nstart))) { + splx(s); + pmap_expand(subord, nstart); + s = splhigh(); + } + splx(s); + } + + PMAP_READ_LOCK(subord,s); + nvaddr = (vm_map_offset_t)nstart; + need_flush = 0; + num_pde = size >> PDESHIFT; + + for (i=0;i> PDESHIFT; + + for (i=0;inx_enabled = 0; +} + +void +pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, + vm_size_t *alloc_size, int *collectable, int *exhaustable) +{ + *count = inuse_ptepages_count; + *cur_size = PAGE_SIZE * inuse_ptepages_count; + *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count); + *elem_size = PAGE_SIZE; + *alloc_size = PAGE_SIZE; + + *collectable = 1; + *exhaustable = 0; +} + +vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e) +{ + enum high_fixed_addresses a; + a = e + HIGH_CPU_END * cpu; + return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); +} + +vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e) +{ + return pmap_cpu_high_map_vaddr(cpu_number(), e); +} + +vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e) +{ + enum high_fixed_addresses a; + vm_offset_t vaddr; + + a = e + HIGH_CPU_END * cpu_number(); + vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); + *(pte_unique_base + a) = pte; + + /* TLB flush for this page for this cpu */ + invlpg((uintptr_t)vaddr); + + return vaddr; +} + + +/* + * Called with pmap locked, we: + * - scan through per-cpu data to see which other cpus need to flush + * - send an IPI to each non-idle cpu to be flushed + * - wait for all to signal back that they are inactive or we see that + * they are in an interrupt handler or at a safe point + * - flush the local tlb is active for this pmap + * - return ... the caller will unlock the pmap + */ +void +pmap_flush_tlbs(pmap_t pmap) +{ + unsigned int cpu; + unsigned int cpu_bit; + cpu_set cpus_to_signal; + unsigned int my_cpu = cpu_number(); + pmap_paddr_t pmap_cr3 = pmap->pm_cr3; + boolean_t flush_self = FALSE; + uint64_t deadline; + + assert(!ml_get_interrupts_enabled()); + + /* + * Scan other cpus for matching active or task CR3. + * For idle cpus (with no active map) we mark them invalid but + * don't signal -- they'll check as they go busy. + * Note: for the kernel pmap we look for 64-bit shared address maps. + */ + cpus_to_signal = 0; + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (!cpu_datap(cpu)->cpu_running) + continue; + if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) || + (cpu_datap(cpu)->cpu_active_cr3 == pmap_cr3) || + ((pmap == kernel_pmap) && + (!CPU_CR3_IS_ACTIVE(cpu) || + cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) { + if (cpu == my_cpu) { + flush_self = TRUE; + continue; + } + cpu_datap(cpu)->cpu_tlb_invalid = TRUE; + __asm__ volatile("mfence"); + + if (CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_signal |= cpu_bit; + i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); + } + } + } + + if (cpus_to_signal) { + KERNEL_DEBUG(0xef800024 | DBG_FUNC_START, cpus_to_signal, 0, 0, 0, 0); + + deadline = mach_absolute_time() + LockTimeOut; + /* + * Wait for those other cpus to acknowledge + */ + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + while ((cpus_to_signal & cpu_bit) != 0) { + if (!cpu_datap(cpu)->cpu_running || + cpu_datap(cpu)->cpu_tlb_invalid == FALSE || + !CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_signal &= ~cpu_bit; + break; + } + if (mach_absolute_time() > deadline) + panic("pmap_flush_tlbs() " + "timeout pmap=%p cpus_to_signal=%p", + pmap, cpus_to_signal); + cpu_pause(); + } + if (cpus_to_signal == 0) + break; + } + KERNEL_DEBUG(0xef800024 | DBG_FUNC_END, cpus_to_signal, 0, 0, 0, 0); + } + + /* + * Flush local tlb if required. + * We need this flush even if the pmap being changed + * is the user map... in case we do a copyin/out + * before returning to user mode. + */ + if (flush_self) + flush_tlb(); + +} + +void +process_pmap_updates(void) +{ + flush_tlb(); + + current_cpu_datap()->cpu_tlb_invalid = FALSE; + __asm__ volatile("mfence"); +} + +void +pmap_update_interrupt(void) +{ + KERNEL_DEBUG(0xef800028 | DBG_FUNC_START, 0, 0, 0, 0, 0); + + assert(!ml_get_interrupts_enabled()); + + process_pmap_updates(); + + KERNEL_DEBUG(0xef800028 | DBG_FUNC_END, 0, 0, 0, 0, 0); +} + + +unsigned int pmap_cache_attributes(ppnum_t pn) { + + if (!pmap_valid_page(pn)) + return (VM_WIMG_IO); + + return (VM_WIMG_COPYBACK); +} + +#ifdef PMAP_DEBUG +void +pmap_dump(pmap_t p) +{ + int i; + + kprintf("pmap 0x%x\n",p); + + kprintf(" pm_cr3 0x%llx\n",p->pm_cr3); + kprintf(" pm_pml4 0x%x\n",p->pm_pml4); + kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt); + + kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4); + for (i=0;i<8;i++) + kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]); +} + +void pmap_dump_wrap(void) +{ + pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap); +} + +void +dump_4GB_pdpt(pmap_t p) +{ + int spl; + pdpt_entry_t *user_pdptp; + pdpt_entry_t *kern_pdptp; + pdpt_entry_t *pml4p; + + spl = splhigh(); + while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { + splx(spl); + pmap_expand_pml4(p, 0x0); + spl = splhigh(); + } + kern_pdptp = kernel_pmap->pm_pdpt; + if (kern_pdptp == NULL) + panic("kern_pdptp == NULL"); + kprintf("dump_4GB_pdpt(%p)\n" + "kern_pdptp=%p (phys=0x%016llx)\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "user_pdptp=%p (phys=0x%016llx)\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n", + p, kern_pdptp, kvtophys(kern_pdptp), + kern_pdptp+0, *(kern_pdptp+0), + kern_pdptp+1, *(kern_pdptp+1), + kern_pdptp+2, *(kern_pdptp+2), + kern_pdptp+3, *(kern_pdptp+3), + kern_pdptp+4, *(kern_pdptp+4), + user_pdptp, kvtophys(user_pdptp), + user_pdptp+0, *(user_pdptp+0), + user_pdptp+1, *(user_pdptp+1), + user_pdptp+2, *(user_pdptp+2), + user_pdptp+3, *(user_pdptp+3), + user_pdptp+4, *(user_pdptp+4)); + kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", + p->pm_cr3, p->pm_hold, p->pm_pml4); + pml4p = (pdpt_entry_t *)p->pm_hold; + if (pml4p == NULL) + panic("user pml4p == NULL"); + kprintf("\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n", + pml4p+0, *(pml4p), + pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX)); + kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", + kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4); + pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold; + if (pml4p == NULL) + panic("kern pml4p == NULL"); + kprintf("\t 0x%08x: 0x%016llx\n" + "\t 0x%08x: 0x%016llx\n", + pml4p+0, *(pml4p), + pml4p+511, *(pml4p+511)); + splx(spl); +} + +void dump_4GB_pdpt_thread(thread_t tp) +{ + dump_4GB_pdpt(tp->map->pmap); +} + + +#endif diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 7faa4124c..dc6f40bce 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -1,5 +1,6 @@ + /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,7 +59,7 @@ * * Machine-dependent structures for the physical map module. */ - +#ifdef KERNEL_PRIVATE #ifndef _PMAP_MACHINE_ #define _PMAP_MACHINE_ 1 @@ -74,10 +75,9 @@ #include #include #include -#define PMAP_QUEUE 1 -#ifdef PMAP_QUEUE -#include -#endif + +#include +#include /* * Define the generic in terms of the specific @@ -97,41 +97,22 @@ * i386/i486/i860 Page Table Entry */ -#ifdef PAE -typedef uint64_t pdpt_entry_t; -typedef uint64_t pt_entry_t; -typedef uint64_t pd_entry_t; -typedef uint64_t pmap_paddr_t; -#else -typedef uint32_t pt_entry_t; -typedef uint32_t pd_entry_t; -typedef uint32_t pmap_paddr_t; -#endif - -#define PT_ENTRY_NULL ((pt_entry_t *) 0) -#define PD_ENTRY_NULL ((pt_entry_t *) 0) - #endif /* ASSEMBLER */ -#ifdef PAE #define NPGPTD 4 #define PDESHIFT 21 #define PTEMASK 0x1ff #define PTEINDX 3 -#else -#define NPGPTD 1 -#define PDESHIFT 22 -#define PTEMASK 0x3ff -#define PTEINDX 2 -#endif + #define PTESHIFT 12 #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ #define INTEL_OFFMASK (I386_PGBYTES - 1) -#define PG_FRAME (~((pmap_paddr_t)PAGE_MASK)) +#define PG_FRAME 0x000FFFFFFFFFF000ULL #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NBPTD (NPGPTD << PAGE_SHIFT) #define NPDEPTD (NBPTD / (sizeof (pd_entry_t))) @@ -139,6 +120,82 @@ typedef uint32_t pmap_paddr_t; #define NBPDE (1 << PDESHIFT) #define PDEMASK (NBPDE - 1) + /* cleanly define parameters for all the page table levels */ +typedef uint64_t pml4_entry_t; +#define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t))) +#define PML4SHIFT 39 +#define PML4PGSHIFT 9 +#define NBPML4 (1ULL << PML4SHIFT) +#define PML4MASK (NBPML4-1) +#define PML4_ENTRY_NULL ((pml4_entry_t *) 0) + +typedef uint64_t pdpt_entry_t; +#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t))) +#define PDPTSHIFT 30 +#define PDPTPGSHIFT 9 +#define NBPDPT (1 << PDPTSHIFT) +#define PDPTMASK (NBPDPT-1) +#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0) + +typedef uint64_t pd_entry_t; +#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define PDSHIFT 21 +#define PDPGSHIFT 9 +#define NBPD (1 << PDSHIFT) +#define PDMASK (NBPD-1) +#define PD_ENTRY_NULL ((pd_entry_t *) 0) + +typedef uint64_t pt_entry_t; +#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define PTSHIFT 12 +#define PTPGSHIFT 9 +#define NBPT (1 << PTSHIFT) +#define PTMASK (NBPT-1) +#define PT_ENTRY_NULL ((pt_entry_t *) 0) + +typedef uint64_t pmap_paddr_t; + +/* + * Atomic 64-bit store of a page table entry. + */ +static inline void +pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) +{ + /* + * Load the new value into %ecx:%ebx + * Load the old value into %edx:%eax + * Compare-exchange-8bytes at address entryp (loaded in %edi) + * If the compare succeeds, the new value will have been stored. + * Otherwise, the old value changed and reloaded, so try again. + */ + asm volatile( + " movl (%0), %%eax \n\t" + " movl 4(%0), %%edx \n\t" + "1: \n\t" + " cmpxchg8b (%0) \n\t" + " jnz 1b" + : + : "D" (entryp), + "b" ((uint32_t)value), + "c" ((uint32_t)(value >> 32)) + : "eax", "edx", "memory"); +} + +/* in 64 bit spaces, the number of each type of page in the page tables */ +#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t)))) +#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t)))) +#define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t)))) +#define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t)))) + +/* + * The 64-bit kernel is remapped in uber-space which is at the base + * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is, + * 512GB from the top of virtual space (or zero). + */ +#define KERNEL_UBER_PML4_INDEX 511 +#define KERNEL_UBER_BASE (0ULL - NBPML4) +#define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32)) + #define VM_WIMG_COPYBACK VM_MEM_COHERENT #define VM_WIMG_DEFAULT VM_MEM_COHERENT /* ?? intel ?? */ @@ -148,56 +205,83 @@ typedef uint32_t pmap_paddr_t; /* write combining mode, aka store gather */ #define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) +/* + * Pte related macros + */ +#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/ + +#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */ +#define pmap_index_to_virt(x) (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT)) + /* * Convert address offset to page descriptor index */ -#define pdenum(pmap, a) (((a) >> PDESHIFT) & PDEMASK) +#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK) +#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1)) +#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1)) +#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1)) /* * Convert page descriptor index to user virtual address @@ -207,7 +291,7 @@ typedef uint32_t pmap_paddr_t; /* * Convert address offset to page table index */ -#define ptenum(a) (((a) >> PTESHIFT) & PTEMASK) +#define ptenum(a) (((vm_offset_t)(a) >> PTESHIFT) & PTEMASK) /* * Hardware pte bit definitions (to be used directly on the ptes @@ -225,13 +309,22 @@ typedef uint32_t pmap_paddr_t; #define INTEL_PTE_PS 0x00000080 #define INTEL_PTE_GLOBAL 0x00000100 #define INTEL_PTE_WIRED 0x00000200 -#define INTEL_PTE_PFN /*0xFFFFF000*/ (~0xFFF) +#define INTEL_PTE_PFN PG_FRAME #define INTEL_PTE_PTA 0x00000080 +#define INTEL_PTE_NX (1ULL << 63) + +#define INTEL_PTE_INVALID 0 + #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */ #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */ #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) +#define pte_kernel_rw(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW)) +#define pte_kernel_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID)) +#define pte_user_rw(p) ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW)) +#define pte_user_ro(p) ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER)) + #define PMAP_DEFAULT_CACHE 0 #define PMAP_INHIBIT_CACHE 1 #define PMAP_GUARDED_CACHE 2 @@ -252,9 +345,14 @@ extern pt_entry_t PTmap[], APTmap[], Upte; extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde; extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ -#ifdef PAE extern pdpt_entry_t *IdlePDPT; -#endif + +extern pmap_paddr_t lo_kernel_cr3; + +extern pml4_entry_t *IdlePML4; +extern pdpt_entry_t *IdlePDPT64; +extern addr64_t kernel64_cr3; +extern boolean_t no_shared_cr3; /* * virtual address to page table entry and @@ -262,7 +360,7 @@ extern pdpt_entry_t *IdlePDPT; * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ -#define vtopte(va) (PTmap + i386_btop(va)) +#define vtopte(va) (PTmap + i386_btop((vm_offset_t)va)) typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ @@ -281,48 +379,50 @@ struct md_page { */ struct pmap { -#ifdef PMAP_QUEUE - queue_head_t pmap_link; /* unordered queue of in use pmaps */ -#endif - pd_entry_t *dirbase; /* page directory pointer register */ - pd_entry_t *pdirbase; /* phys. address of dirbase */ - vm_object_t pm_obj; /* object to hold pte's */ + pd_entry_t *dirbase; /* page directory pointer */ + pmap_paddr_t pdirbase; /* phys. address of dirbase */ + vm_object_t pm_obj; /* object to hold pde's */ int ref_count; /* reference count */ + int nx_enabled; + boolean_t pm_64bit; + boolean_t pm_kernel_cr3; decl_simple_lock_data(,lock) /* lock on map */ struct pmap_statistics stats; /* map statistics */ - cpu_set cpus_using; /* bitmap of cpus using pmap */ -#ifdef PAE vm_offset_t pm_hold; /* true pdpt zalloc addr */ - pdpt_entry_t *pm_pdpt; /* KVA of pg dir ptr table */ - vm_offset_t pm_ppdpt; /* phy addr pdpt - should really be 32/64 bit */ -#endif + pmap_paddr_t pm_cr3; /* physical addr */ + pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */ + pml4_entry_t *pm_pml4; /* VKA of top level */ + vm_object_t pm_obj_pdpt; /* holds pdpt pages */ + vm_object_t pm_obj_pml4; /* holds pml4 pages */ + vm_object_t pm_obj_top; /* holds single top level page */ }; -#define PMAP_NWINDOWS 4 + +#define PMAP_PDPT_FIRST_WINDOW 0 +#define PMAP_PDPT_NWINDOWS 4 +#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS) +#define PMAP_PDE_NWINDOWS 4 +#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS) +#define PMAP_PTE_NWINDOWS 4 + +#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS) +#define PMAP_WINDOW_SIZE 8 +#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE) + typedef struct { pt_entry_t *prv_CMAP; caddr_t prv_CADDR; } mapwindow_t; typedef struct cpu_pmap { + int pdpt_window_index; + int pde_window_index; + int pte_window_index; mapwindow_t mapwindow[PMAP_NWINDOWS]; - struct pmap *real_pmap; - struct pmap_update_list *update_list; - volatile boolean_t update_needed; } cpu_pmap_t; -/* - * Should be rewritten in asm anyway. - */ -#define CM1 (current_cpu_datap()->cpu_pmap->mapwindow[0].prv_CMAP) -#define CM2 (current_cpu_datap()->cpu_pmap->mapwindow[1].prv_CMAP) -#define CM3 (current_cpu_datap()->cpu_pmap->mapwindow[2].prv_CMAP) -#define CM4 (current_cpu_datap()->cpu_pmap->mapwindow[3].prv_CMAP) -#define CA1 (current_cpu_datap()->cpu_pmap->mapwindow[0].prv_CADDR) -#define CA2 (current_cpu_datap()->cpu_pmap->mapwindow[1].prv_CADDR) -#define CA3 (current_cpu_datap()->cpu_pmap->mapwindow[2].prv_CADDR) -#define CA4 (current_cpu_datap()->cpu_pmap->mapwindow[3].prv_CADDR) + +extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry); typedef struct pmap_memory_regions { ppnum_t base; @@ -334,117 +434,62 @@ typedef struct pmap_memory_regions { unsigned pmap_memory_region_count; unsigned pmap_memory_region_current; -#define PMAP_MEMORY_REGIONS_SIZE 32 +#define PMAP_MEMORY_REGIONS_SIZE 128 extern pmap_memory_region_t pmap_memory_regions[]; -/* - * Optimization avoiding some TLB flushes when switching to - * kernel-loaded threads. This is effective only for i386: - * Since user task, kernel task and kernel loaded tasks share the - * same virtual space (with appropriate protections), any pmap - * allows mapping kernel and kernel loaded tasks. - * - * The idea is to avoid switching to another pmap unnecessarily when - * switching to a kernel-loaded task, or when switching to the kernel - * itself. - * - * We store the pmap we are really using (from which we fetched the - * dirbase value) in current_cpu_datap()->cpu_pmap.real_pmap. - * - * Invariant: - * current_pmap() == current_cpu_datap()->cpu_pmap.real_pmap || - * current_pmap() == kernel_pmap. - */ -#define PMAP_REAL(my_cpu) (cpu_datap(my_cpu)->cpu_pmap->real_pmap) - -#include -/* - * If switching to the kernel pmap, don't incur the TLB cost of switching - * to its page tables, since all maps include the kernel map as a subset. - * Simply record that this CPU is logically on the kernel pmap (see - * pmap_destroy). - * - * Similarly, if switching to a pmap (other than kernel_pmap that is already - * in use, don't do anything to the hardware, to avoid a TLB flush. - */ - -#define PMAP_CPU_SET(pmap, my_cpu) i_bit_set(my_cpu, &((pmap)->cpus_using)) -#define PMAP_CPU_CLR(pmap, my_cpu) i_bit_clear(my_cpu, &((pmap)->cpus_using)) - -#ifdef PAE -#define PDIRBASE pm_ppdpt -#else -#define PDIRBASE pdirbase -#endif -#define set_dirbase(mypmap, my_cpu) { \ - struct pmap **ppmap = &PMAP_REAL(my_cpu); \ - pmap_paddr_t pdirbase = (pmap_paddr_t)((mypmap)->PDIRBASE); \ - \ - if (*ppmap == (pmap_paddr_t)NULL) { \ - *ppmap = (mypmap); \ - PMAP_CPU_SET((mypmap), my_cpu); \ - set_cr3(pdirbase); \ - } else if ((mypmap) != kernel_pmap && (mypmap) != *ppmap ) { \ - if (*ppmap != kernel_pmap) \ - PMAP_CPU_CLR(*ppmap, my_cpu); \ - *ppmap = (mypmap); \ - PMAP_CPU_SET((mypmap), my_cpu); \ - set_cr3(pdirbase); \ - } \ - assert((mypmap) == *ppmap || (mypmap) == kernel_pmap); \ +static inline void set_dirbase(pmap_t tpmap, __unused int tcpu) { + current_cpu_datap()->cpu_task_cr3 = (pmap_paddr_t)((tpmap)->pm_cr3); + current_cpu_datap()->cpu_task_map = tpmap->pm_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT; } -/* - * List of cpus that are actively using mapped memory. Any - * pmap update operation must wait for all cpus in this list. - * Update operations must still be queued to cpus not in this - * list. - */ -extern cpu_set cpus_active; - -/* - * List of cpus that are idle, but still operating, and will want - * to see any kernel pmap updates when they become active. - */ -extern cpu_set cpus_idle; - - -#define cpu_update_needed(cpu) cpu_datap(cpu)->cpu_pmap->update_needed -#define cpu_update_list(cpu) cpu_datap(cpu)->cpu_pmap->update_list - /* * External declarations for PMAP_ACTIVATE. */ -extern void process_pmap_updates(struct pmap *pmap); +extern void process_pmap_updates(void); extern void pmap_update_interrupt(void); /* * Machine dependent routines that are used only for i386/i486/i860. */ -extern vm_offset_t (kvtophys)( +extern addr64_t (kvtophys)( vm_offset_t addr); extern pt_entry_t *pmap_pte( struct pmap *pmap, - vm_offset_t addr); + vm_map_offset_t addr); + +extern pd_entry_t *pmap_pde( + struct pmap *pmap, + vm_map_offset_t addr); + +extern pd_entry_t *pmap64_pde( + struct pmap *pmap, + vm_map_offset_t addr); + +extern pdpt_entry_t *pmap64_pdpt( + struct pmap *pmap, + vm_map_offset_t addr); extern vm_offset_t pmap_map( vm_offset_t virt, - vm_offset_t start, - vm_offset_t end, - vm_prot_t prot); + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t prot, + unsigned int flags); extern vm_offset_t pmap_map_bd( vm_offset_t virt, - vm_offset_t start, - vm_offset_t end, - vm_prot_t prot); + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t prot, + unsigned int flags); extern void pmap_bootstrap( - vm_offset_t load_start); + vm_offset_t load_start, + boolean_t IA32e); extern boolean_t pmap_valid_page( ppnum_t pn); @@ -454,14 +499,28 @@ extern int pmap_list_resident_pages( vm_offset_t *listp, int space); -extern void pmap_commpage_init( +extern void pmap_commpage32_init( vm_offset_t kernel, vm_offset_t user, int count); +extern void pmap_commpage64_init( + vm_offset_t kernel, + vm_map_offset_t user, + int count); + extern struct cpu_pmap *pmap_cpu_alloc( boolean_t is_boot_cpu); extern void pmap_cpu_free( struct cpu_pmap *cp); + +extern void pmap_map_block( + pmap_t pmap, + addr64_t va, + ppnum_t pa, + uint32_t size, + vm_prot_t prot, + int attr, + unsigned int flags); extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); @@ -469,6 +528,24 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); extern void pmap_sync_page_data_phys(ppnum_t pa); extern void pmap_sync_page_attributes_phys(ppnum_t pa); +extern kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size); +extern kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr); +extern void pmap_map_sharedpage(task_t task, pmap_t pmap); +extern void pmap_unmap_sharedpage(pmap_t pmap); +extern void pmap_disable_NX(pmap_t pmap); +extern void pmap_set_4GB_pagezero(pmap_t pmap); +extern void pmap_clear_4GB_pagezero(pmap_t pmap); +extern void pmap_load_kernel_cr3(void); +extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types); +extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types); +extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types); +extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int); +extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int); + +extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *); + + + /* * Macros for speed. */ @@ -483,125 +560,109 @@ extern void pmap_sync_page_attributes_phys(ppnum_t pa); #undef PMAP_DEACTIVATE_USER #endif -/* - * For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage - * fields to control TLB invalidation on other CPUS. - */ -#define PMAP_ACTIVATE_KERNEL(my_cpu) { \ - \ - /* \ - * Let pmap updates proceed while we wait for this pmap. \ - */ \ - i_bit_clear((my_cpu), &cpus_active); \ - \ - /* \ - * Lock the pmap to put this cpu in its active set. \ - * Wait for updates here. \ - */ \ - simple_lock(&kernel_pmap->lock); \ - \ - /* \ - * Process invalidate requests for the kernel pmap. \ - */ \ - if (cpu_update_needed(my_cpu)) \ - process_pmap_updates(kernel_pmap); \ - \ - /* \ - * Mark that this cpu is using the pmap. \ - */ \ - i_bit_set((my_cpu), &kernel_pmap->cpus_using); \ - \ - /* \ - * Mark this cpu active - IPL will be lowered by \ - * load_context(). \ - */ \ - i_bit_set((my_cpu), &cpus_active); \ - \ - simple_unlock(&kernel_pmap->lock); \ +#define PMAP_ACTIVATE_KERNEL(my_cpu) { \ + spl_t spl; \ + \ + spl = splhigh(); \ + if (current_cpu_datap()->cpu_tlb_invalid) \ + process_pmap_updates(); \ + splx(spl); \ } -#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \ - /* \ - * Mark pmap no longer in use by this cpu even if \ - * pmap is locked against updates. \ - */ \ - i_bit_clear((my_cpu), &kernel_pmap->cpus_using); \ - i_bit_clear((my_cpu), &cpus_active); \ - PMAP_REAL(my_cpu) = NULL; \ +#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \ + spl_t spl; \ + \ + spl = splhigh(); \ + process_pmap_updates(); \ + splx(spl); \ } + #define PMAP_ACTIVATE_MAP(map, my_cpu) { \ register pmap_t tpmap; \ - \ - tpmap = vm_map_pmap(map); \ - if (tpmap == kernel_pmap) { \ - /* \ - * If this is the kernel pmap, switch to its page tables. \ - */ \ - set_dirbase(kernel_pmap, my_cpu); \ - } \ - else { \ - /* \ - * Let pmap updates proceed while we wait for this pmap. \ - */ \ - i_bit_clear((my_cpu), &cpus_active); \ - \ - /* \ - * Lock the pmap to put this cpu in its active set. \ - * Wait for updates here. \ - */ \ - simple_lock(&tpmap->lock); \ - \ - /* \ - * No need to invalidate the TLB - the entire user pmap \ - * will be invalidated by reloading dirbase. \ - */ \ - set_dirbase(tpmap, my_cpu); \ - \ - /* \ - * Mark this cpu active - IPL will be lowered by \ - * load_context(). \ - */ \ - i_bit_set((my_cpu), &cpus_active); \ - \ - simple_unlock(&tpmap->lock); \ - } \ + \ + tpmap = vm_map_pmap(map); \ + set_dirbase(tpmap, my_cpu); \ } #define PMAP_DEACTIVATE_MAP(map, my_cpu) -#define PMAP_ACTIVATE_USER(th, my_cpu) { \ - spl_t spl; \ - \ - spl = splhigh(); \ - PMAP_ACTIVATE_MAP(th->map, my_cpu) \ - splx(spl); \ +#define PMAP_ACTIVATE_USER(th, my_cpu) { \ + spl_t spl; \ + \ + spl = splhigh(); \ + PMAP_ACTIVATE_MAP(th->map, my_cpu) \ + splx(spl); \ } #define PMAP_DEACTIVATE_USER(th, my_cpu) + #define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ spl_t spl; \ - \ + pt_entry_t *kpdp; \ + pt_entry_t *updp; \ + int i; \ + int need_flush; \ + \ + need_flush = 0; \ + spl = splhigh(); \ if (old_th->map != new_th->map) { \ - spl = splhigh(); \ PMAP_DEACTIVATE_MAP(old_th->map, my_cpu); \ PMAP_ACTIVATE_MAP(new_th->map, my_cpu); \ - splx(spl); \ } \ + kpdp = current_cpu_datap()->cpu_copywindow_pdp; \ + for (i = 0; i < NCOPY_WINDOWS; i++) { \ + if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) { \ + updp = pmap_pde(new_th->map->pmap, \ + new_th->machine.copy_window[i].user_base);\ + *kpdp = updp ? *updp : 0; \ + } \ + kpdp++; \ + } \ + splx(spl); \ + if (new_th->machine.copyio_state == WINDOWS_OPENED) \ + need_flush = 1; \ + else \ + new_th->machine.copyio_state = WINDOWS_DIRTY; \ + if (new_th->machine.physwindow_pte) { \ + *(current_cpu_datap()->cpu_physwindow_ptep) = \ + new_th->machine.physwindow_pte; \ + if (need_flush == 0) \ + invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\ + } \ + if (need_flush) \ + flush_tlb(); \ } #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ spl_t spl; \ \ - spl = splhigh(); \ + spl = splhigh(); \ PMAP_DEACTIVATE_MAP(th->map, my_cpu); \ th->map = new_map; \ PMAP_ACTIVATE_MAP(th->map, my_cpu); \ splx(spl); \ + inval_copy_windows(th); \ } +/* + * Marking the current cpu's cr3 inactive is achieved by setting its lsb. + * Marking the current cpu's cr3 active once more involves clearng this bit. + * Note that valid page tables are page-aligned and so the bottom 12 bits + * are noramlly zero. + * We can only mark the current cpu active/inactive but we can test any cpu. + */ +#define CPU_CR3_MARK_INACTIVE() \ + current_cpu_datap()->cpu_active_cr3 |= 1 + +#define CPU_CR3_MARK_ACTIVE() \ + current_cpu_datap()->cpu_active_cr3 &= ~1 + +#define CPU_CR3_IS_ACTIVE(cpu) \ + ((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0) + #define MARK_CPU_IDLE(my_cpu) { \ /* \ * Mark this cpu idle, and remove it from the active set, \ @@ -611,13 +672,16 @@ extern void pmap_sync_page_attributes_phys(ppnum_t pa); * becomes active. \ */ \ int s = splhigh(); \ - i_bit_set((my_cpu), &cpus_idle); \ - i_bit_clear((my_cpu), &cpus_active); \ + if (!cpu_mode_is64bit() || no_shared_cr3) \ + process_pmap_updates(); \ + else \ + pmap_load_kernel_cr3(); \ + CPU_CR3_MARK_INACTIVE(); \ + __asm__ volatile("mfence"); \ splx(s); \ - set_led(my_cpu); \ } -#define MARK_CPU_ACTIVE(my_cpu) { \ +#define MARK_CPU_ACTIVE(my_cpu) { \ \ int s = splhigh(); \ /* \ @@ -630,23 +694,20 @@ extern void pmap_sync_page_attributes_phys(ppnum_t pa); * set assures that we will receive another update \ * interrupt if this happens. \ */ \ - i_bit_clear((my_cpu), &cpus_idle); \ - \ - if (cpu_update_needed(my_cpu)) \ - pmap_update_interrupt(); \ + CPU_CR3_MARK_ACTIVE(); \ + __asm__ volatile("mfence"); \ \ - /* \ - * Mark that this cpu is now active. \ - */ \ - i_bit_set((my_cpu), &cpus_active); \ + if (current_cpu_datap()->cpu_tlb_invalid) \ + process_pmap_updates(); \ splx(s); \ - clear_led(my_cpu); \ } #define PMAP_CONTEXT(pmap, thread) #define pmap_kernel_va(VA) \ - (((VA) >= VM_MIN_KERNEL_ADDRESS) && ((VA) <= VM_MAX_KERNEL_ADDRESS)) + ((((vm_offset_t) (VA)) >= vm_min_kernel_address) && \ + (((vm_offset_t) (VA)) <= vm_max_kernel_address)) + #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) #define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr) @@ -657,4 +718,8 @@ extern void pmap_sync_page_attributes_phys(ppnum_t pa); #endif /* ASSEMBLER */ + #endif /* _PMAP_MACHINE_ */ + + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/postcode.h b/osfmk/i386/postcode.h index d0065f7a6..ed74d9dc2 100644 --- a/osfmk/i386/postcode.h +++ b/osfmk/i386/postcode.h @@ -33,7 +33,7 @@ /* The POSTCODE is port 0x80 */ #define POSTPORT 0x80 -#define SPINCOUNT 100000000 +#define SPINCOUNT 300000000 #define CPU_PAUSE() rep; nop #if DEBUG @@ -41,6 +41,7 @@ * Macro to output byte value to postcode, destoying register al. * Additionally, if POSTCODE_DELAY, spin for about a second. */ + #if POSTCODE_DELAY #define POSTCODE_AL \ outb %al,$(POSTPORT); \ @@ -96,37 +97,50 @@ * The following postcodes are defined for stages of early startup: */ -#define PSTART_ENTRY 0xFF -#define PSTART_PAGE_TABLES 0xFE -#define PSTART_BEFORE_PAGING 0xFD -#define VSTART_ENTRY 0xFC -#define VSTART_STACK_SWITCH 0xFB -#define VSTART_EXIT 0xFA -#define I386_INIT_ENTRY 0xF9 -#define CPU_INIT_D 0xF8 -#define PROCESSOR_BOOTSTRAP_D 0xF7 -#define PE_INIT_PLATFORM_D 0xF6 -#define THREAD_BOOTSTRAP_D 0xF5 - -#define SLAVE_PSTART_ENTRY 0xEF -#define REAL_TO_PROT_ENTRY 0xEE -#define REAL_TO_PROT_EXIT 0xED -#define STARTPROG_ENTRY 0xEC -#define STARTPROG_EXIT 0xEB -#define SLAVE_START_ENTRY 0xEA -#define SLAVE_START_EXIT 0xE9 -#define SVSTART_ENTRY 0xE8 -#define SVSTART_DESC_INIT 0xE7 -#define SVSTART_STACK_SWITCH 0xE6 -#define SVSTART_EXIT 0xE5 -#define I386_INIT_SLAVE 0xE4 - -#define MP_KDP_ENTER 0xDB /* Machine in kdp DeBugger */ -#define PANIC_HLT 0xD1 /* Die an early death */ - -#define ACPI_WAKE_START_ENTRY 0xCF -#define ACPI_WAKE_PROT_ENTRY 0xCE -#define ACPI_WAKE_PAGED_ENTRY 0xCD +#define _PSTART_ENTRY 0xFF +#define _PSTART_RELOC 0xFE +#define PSTART_ENTRY 0xFD +#define PSTART_PAGE_TABLES 0xFC +#define PSTART_BEFORE_PAGING 0xFB +#define VSTART_ENTRY 0xFA +#define VSTART_STACK_SWITCH 0xF9 +#define VSTART_EXIT 0xF8 +#define I386_INIT_ENTRY 0xF7 +#define CPU_INIT_D 0xF6 +#define PE_INIT_PLATFORM_D 0xF5 + +#define SLAVE_RSTART_ENTRY 0xEF +#define SLAVE_REAL_TO_PROT_ENTRY 0xEE +#define SLAVE_REAL_TO_PROT_EXIT 0xED +#define SLAVE_STARTPROG_ENTRY 0xEC +#define SLAVE_STARTPROG_EXIT 0xEB +#define SLAVE_PSTART_ENTRY 0xEA +#define SLAVE_PSTART_EXIT 0xE9 +#define SLAVE_VSTART_ENTRY 0xE8 +#define SLAVE_VSTART_DESC_INIT 0xE7 +#define SLAVE_VSTART_STACK_SWITCH 0xE6 +#define SLAVE_VSTART_EXIT 0xE5 +#define I386_INIT_SLAVE 0xE4 + +#define PANIC_DOUBLE_FAULT 0xDF /* Double Fault exception */ +#define PANIC_MACHINE_CHECK 0xDE /* Machine-Check */ +#define MP_KDP_ENTER 0xDB /* Machine in kdp DeBugger */ +#define PANIC_HLT 0xD1 /* Die an early death */ +#define NO_64BIT 0x64 /* No 64-bit support yet */ + +#define ACPI_WAKE_START_ENTRY 0xCF +#define ACPI_WAKE_PROT_ENTRY 0xCE +#define ACPI_WAKE_PAGED_ENTRY 0xCD + +#define CPU_IA32_ENABLE_ENTRY 0xBF +#define CPU_IA32_ENABLE_EXIT 0xBE +#define ML_LOAD_DESC64_ENTRY 0xBD +#define ML_LOAD_DESC64_GDT 0xBC +#define ML_LOAD_DESC64_IDT 0xBB +#define ML_LOAD_DESC64_LDT 0xBA +#define ML_LOAD_DESC64_EXIT 0xB9 +#define CPU_IA32_DISABLE_ENTRY 0xB8 +#define CPU_IA32_DISABLE_EXIT 0xB7 #ifndef ASSEMBLER inline static void diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index 153b6bdbd..27ca1e7ed 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -189,9 +189,17 @@ static inline void set_cr3(unsigned int value) __asm__ volatile("mov %0, %%cr3" : : "r" (value)); } -/* Implemented in locore: */ -extern uint32_t get_cr4(void); -extern void set_cr4(uint32_t); +static inline uint32_t get_cr4(void) +{ + uint32_t cr4; + __asm__ volatile("mov %%cr4, %0" : "=r" (cr4)); + return(cr4); +} + +static inline void set_cr4(uint32_t value) +{ + __asm__ volatile("mov %0, %%cr4" : : "r" (value)); +} static inline void clear_ts(void) { @@ -210,23 +218,30 @@ static inline void set_tr(unsigned int seg) __asm__ volatile("ltr %0" : : "rm" ((unsigned short)(seg))); } -static inline unsigned short get_ldt(void) +static inline unsigned short sldt(void) { unsigned short seg; __asm__ volatile("sldt %0" : "=rm" (seg)); return(seg); } -static inline void set_ldt(unsigned int seg) +static inline void lldt(unsigned int seg) { __asm__ volatile("lldt %0" : : "rm" ((unsigned short)(seg))); } +#ifdef MACH_KERNEL_PRIVATE +extern void flush_tlb64(void); static inline void flush_tlb(void) { unsigned long cr3_temp; + if (cpu_mode_is64bit()) { + flush_tlb64(); + return; + } __asm__ volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (cr3_temp) :: "memory"); } +#endif /* MACH_KERNEL_PRIVATE */ static inline void wbinvd(void) { @@ -304,8 +319,14 @@ __END_DECLS #define MSR_IA32_PERFCTR0 0xc1 #define MSR_IA32_PERFCTR1 0xc2 +#define MSR_PMG_CST_CONFIG_CONTROL 0xe2 + #define MSR_IA32_BBL_CR_CTL 0x119 +#define MSR_IA32_SYSENTER_CS 0x174 +#define MSR_IA32_SYSENTER_ESP 0x175 +#define MSR_IA32_SYSENTER_EIP 0x176 + #define MSR_IA32_MCG_CAP 0x179 #define MSR_IA32_MCG_STATUS 0x17a #define MSR_IA32_MCG_CTL 0x17b @@ -313,6 +334,9 @@ __END_DECLS #define MSR_IA32_EVNTSEL0 0x186 #define MSR_IA32_EVNTSEL1 0x187 +#define MSR_IA32_PERF_STS 0x198 +#define MSR_IA32_PERF_CTL 0x199 + #define MSR_IA32_MISC_ENABLE 0x1a0 #define MSR_IA32_DEBUGCTLMSR 0x1d9 @@ -344,4 +368,20 @@ __END_DECLS #define MSR_IA32_MTRR_FIX4K_F0000 0x26e #define MSR_IA32_MTRR_FIX4K_F8000 0x26f + +#define MSR_IA32_EFER 0xC0000080 +#define MSR_IA32_EFER_SCE 0x00000001 +#define MSR_IA32_EFER_LME 0x00000100 +#define MSR_IA32_EFER_LMA 0x00000400 +#define MSR_IA32_EFER_NXE 0x00000800 + +#define MSR_IA32_STAR 0xC0000081 +#define MSR_IA32_LSTAR 0xC0000082 +#define MSR_IA32_CSTAR 0xC0000083 +#define MSR_IA32_FMASK 0xC0000084 + +#define MSR_IA32_FS_BASE 0xC0000100 +#define MSR_IA32_GS_BASE 0xC0000101 +#define MSR_IA32_KERNEL_GS_BASE 0xC0000102 + #endif /* _I386_PROC_REG_H_ */ diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index d6368afa6..8193417b6 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -51,7 +51,7 @@ #include /* for kernel_map */ #include #include -#include +#include #include #include #include @@ -61,11 +61,12 @@ #include #include #include -#include #include #include #include #include +#include +#include #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)>(b))?(b):(a)) @@ -74,429 +75,62 @@ #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 -int sysclk_config(void); +int rtclock_config(void); -int sysclk_init(void); +int rtclock_init(void); -kern_return_t sysclk_gettime( - mach_timespec_t *cur_time); +uint64_t rtc_decrementer_min; -kern_return_t sysclk_getattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t *count); +void rtclock_intr(x86_saved_state_t *regs); +static uint64_t maxDec; /* longest interval our hardware timer can handle (nsec) */ -void sysclk_setalarm( - mach_timespec_t *alarm_time); +/* XXX this should really be in a header somewhere */ +extern clock_timer_func_t rtclock_timer_expire; -/* - * Lists of clock routines. - */ -struct clock_ops sysclk_ops = { - sysclk_config, sysclk_init, - sysclk_gettime, 0, - sysclk_getattr, 0, - sysclk_setalarm, -}; - -int calend_config(void); - -int calend_init(void); - -kern_return_t calend_gettime( - mach_timespec_t *cur_time); - -kern_return_t calend_getattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t *count); - -struct clock_ops calend_ops = { - calend_config, calend_init, - calend_gettime, 0, - calend_getattr, 0, - 0, -}; - -/* local data declarations */ - -static clock_timer_func_t rtclock_timer_expire; - -static timer_call_data_t rtclock_alarm_timer; - -static void rtclock_alarm_expire( - timer_call_param_t p0, - timer_call_param_t p1); - -struct { - mach_timespec_t calend_offset; - boolean_t calend_is_set; - - int64_t calend_adjtotal; - int32_t calend_adjdelta; - - uint32_t boottime; - - mach_timebase_info_data_t timebase_const; - - decl_simple_lock_data(,lock) /* real-time clock device lock */ -} rtclock; - -boolean_t rtc_initialized = FALSE; -clock_res_t rtc_intr_nsec = NSEC_PER_HZ; /* interrupt res */ -uint64_t rtc_cycle_count; /* clocks in 1/20th second */ -uint64_t rtc_cyc_per_sec; /* processor cycles per sec */ -uint32_t rtc_boot_frequency; /* provided by 1st speed-step */ -uint32_t rtc_quant_scale; /* clock to nanos multiplier */ -uint32_t rtc_quant_shift; /* clock to nanos right shift */ -uint64_t rtc_decrementer_min; - -static mach_timebase_info_data_t rtc_lapic_scale; /* nsec to lapic count */ - -/* - * Macros to lock/unlock real-time clock data. - */ -#define RTC_INTRS_OFF(s) \ - (s) = splclock() - -#define RTC_INTRS_ON(s) \ - splx(s) - -#define RTC_LOCK(s) \ -MACRO_BEGIN \ - RTC_INTRS_OFF(s); \ - simple_lock(&rtclock.lock); \ -MACRO_END - -#define RTC_UNLOCK(s) \ -MACRO_BEGIN \ - simple_unlock(&rtclock.lock); \ - RTC_INTRS_ON(s); \ -MACRO_END +static void rtc_set_timescale(uint64_t cycles); +static uint64_t rtc_export_speed(uint64_t cycles); -/* - * i8254 control. ** MONUMENT ** - * - * The i8254 is a traditional PC device with some arbitrary characteristics. - * Basically, it is a register that counts at a fixed rate and can be - * programmed to generate an interrupt every N counts. The count rate is - * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12 - * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being - * the master crystal oscillator reference frequency since the very first PC.] - * Various constants are computed based on this value, and we calculate - * them at init time for execution efficiency. To obtain sufficient - * accuracy, some of the calculation are most easily done in floating - * point and then converted to int. - * - */ +extern void rtc_nanotime_store( + uint64_t tsc, + uint64_t nsec, + uint32_t scale, + uint32_t shift, + rtc_nanotime_t *dst); -/* - * Forward decl. - */ +extern void rtc_nanotime_load( + rtc_nanotime_t *src, + rtc_nanotime_t *dst); -static uint64_t rtc_set_cyc_per_sec(uint64_t cycles); -uint64_t rtc_nanotime_read(void); +rtc_nanotime_t rtc_nanotime_info; /* - * create_mul_quant_GHZ - * create a constant used to multiply the TSC by to convert to nanoseconds. - * This is a 32 bit number and the TSC *MUST* have a frequency higher than - * 1000Mhz for this routine to work. + * tsc_to_nanoseconds: * - * The theory here is that we know how many TSCs-per-sec the processor runs at. - * Normally to convert this to nanoseconds you would multiply the current - * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec. - * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate - * results from the multiply that must be divided by. - * Usually thats - * uint96 = tsc * numer - * nanos = uint96 / denom - * Instead, we create this quant constant and it becomes the numerator, - * the denominator can then be 0x100000000 which makes our division as simple as - * forgetting the lower 32 bits of the result. We can also pass this number to - * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to - * convert raw counts * to nanos. The difference is so small as to be - * undetectable by anything. - * - * Unfortunatly we can not do this for sub GHZ processors. In this case, all - * we do is pass the CPU speed in raw as the denom and we pass in 1000000000 - * as the numerator. No short cuts allowed - */ -#define RTC_FAST_DENOM 0xFFFFFFFF -inline static uint32_t -create_mul_quant_GHZ(int shift, uint32_t quant) -{ - return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant); -} -/* - * This routine takes a value of raw TSC ticks and applies the passed mul_quant - * generated by create_mul_quant() This is our internal routine for creating - * nanoseconds. - * Since we don't really have uint96_t this routine basically does this.... - * uint96_t intermediate = (*value) * scale - * return (intermediate >> 32) - */ -inline static uint64_t -fast_get_nano_from_abs(uint64_t value, int scale) -{ - asm (" movl %%edx,%%esi \n\t" - " mull %%ecx \n\t" - " movl %%edx,%%edi \n\t" - " movl %%esi,%%eax \n\t" - " mull %%ecx \n\t" - " xorl %%ecx,%%ecx \n\t" - " addl %%edi,%%eax \n\t" - " adcl %%ecx,%%edx " - : "+A" (value) - : "c" (scale) - : "%esi", "%edi"); - return value; -} - -/* - * This routine basically does this... - * ts.tv_sec = nanos / 1000000000; create seconds - * ts.tv_nsec = nanos % 1000000000; create remainder nanos - */ -inline static mach_timespec_t -nanos_to_timespec(uint64_t nanos) -{ - union { - mach_timespec_t ts; - uint64_t u64; - } ret; - ret.u64 = nanos; - asm volatile("divl %1" : "+A" (ret.u64) : "r" (NSEC_PER_SEC)); - return ret.ts; -} - -/* - * The following two routines perform the 96 bit arithmetic we need to - * convert generic absolute<->nanoseconds - * The multiply routine takes a uint64_t and a uint32_t and returns the result - * in a uint32_t[3] array. - * The divide routine takes this uint32_t[3] array and divides it by a uint32_t - * returning a uint64_t + * Basic routine to convert a raw 64 bit TSC value to a + * 64 bit nanosecond value. The conversion is implemented + * based on the scale factor and an implicit 32 bit shift. */ -inline static void -longmul(uint64_t *abstime, uint32_t multiplicand, uint32_t *result) -{ - asm volatile( - " pushl %%ebx \n\t" - " movl %%eax,%%ebx \n\t" - " movl (%%eax),%%eax \n\t" - " mull %%ecx \n\t" - " xchg %%eax,%%ebx \n\t" - " pushl %%edx \n\t" - " movl 4(%%eax),%%eax \n\t" - " mull %%ecx \n\t" - " movl %2,%%ecx \n\t" - " movl %%ebx,(%%ecx) \n\t" - " popl %%ebx \n\t" - " addl %%ebx,%%eax \n\t" - " popl %%ebx \n\t" - " movl %%eax,4(%%ecx) \n\t" - " adcl $0,%%edx \n\t" - " movl %%edx,8(%%ecx) // and save it" - : : "a"(abstime), "c"(multiplicand), "m"(result)); - -} - -inline static uint64_t -longdiv(uint32_t *numer, uint32_t denom) -{ - uint64_t result; - asm volatile( - " pushl %%ebx \n\t" - " movl %%eax,%%ebx \n\t" - " movl 8(%%eax),%%edx \n\t" - " movl 4(%%eax),%%eax \n\t" - " divl %%ecx \n\t" - " xchg %%ebx,%%eax \n\t" - " movl (%%eax),%%eax \n\t" - " divl %%ecx \n\t" - " xchg %%ebx,%%edx \n\t" - " popl %%ebx \n\t" - : "=A"(result) : "a"(numer),"c"(denom)); - return result; -} - -/* - * Enable or disable timer 2. - * Port 0x61 controls timer 2: - * bit 0 gates the clock, - * bit 1 gates output to speaker. - */ -inline static void -enable_PIT2(void) -{ - asm volatile( - " inb $0x61,%%al \n\t" - " and $0xFC,%%al \n\t" - " or $1,%%al \n\t" - " outb %%al,$0x61 \n\t" - : : : "%al" ); -} - -inline static void -disable_PIT2(void) -{ - asm volatile( - " inb $0x61,%%al \n\t" - " and $0xFC,%%al \n\t" - " outb %%al,$0x61 \n\t" - : : : "%al" ); -} - -inline static void -set_PIT2(int value) -{ -/* - * First, tell the clock we are going to write 16 bits to the counter - * and enable one-shot mode (command 0xB8 to port 0x43) - * Then write the two bytes into the PIT2 clock register (port 0x42). - * Loop until the value is "realized" in the clock, - * this happens on the next tick. - */ - asm volatile( - " movb $0xB8,%%al \n\t" - " outb %%al,$0x43 \n\t" - " movb %%dl,%%al \n\t" - " outb %%al,$0x42 \n\t" - " movb %%dh,%%al \n\t" - " outb %%al,$0x42 \n" -"1: inb $0x42,%%al \n\t" - " inb $0x42,%%al \n\t" - " cmp %%al,%%dh \n\t" - " jne 1b" - : : "d"(value) : "%al"); -} - -inline static uint64_t -get_PIT2(unsigned int *value) -{ - register uint64_t result; -/* - * This routine first latches the time (command 0x80 to port 0x43), - * then gets the time stamp so we know how long the read will take later. - * Read (from port 0x42) and return the current value of the timer. - */ - asm volatile( - " xorl %%ecx,%%ecx \n\t" - " movb $0x80,%%al \n\t" - " outb %%al,$0x43 \n\t" - " rdtsc \n\t" - " pushl %%eax \n\t" - " inb $0x42,%%al \n\t" - " movb %%al,%%cl \n\t" - " inb $0x42,%%al \n\t" - " movb %%al,%%ch \n\t" - " popl %%eax " - : "=A"(result), "=c"(*value)); - return result; -} - -/* - * timeRDTSC() - * This routine sets up PIT counter 2 to count down 1/20 of a second. - * It pauses until the value is latched in the counter - * and then reads the time stamp counter to return to the caller. - */ -static uint64_t -timeRDTSC(void) +static inline uint64_t +_tsc_to_nanoseconds(uint64_t value) { - int attempts = 0; - uint64_t latchTime; - uint64_t saveTime,intermediate; - unsigned int timerValue, lastValue; - boolean_t int_enabled; - /* - * Table of correction factors to account for - * - timer counter quantization errors, and - * - undercounts 0..5 - */ -#define SAMPLE_CLKS_EXACT (((double) CLKNUM) / 20.0) -#define SAMPLE_CLKS_INT ((int) CLKNUM / 20) -#define SAMPLE_NSECS (2000000000LL) -#define SAMPLE_MULTIPLIER (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT) -#define ROUND64(x) ((uint64_t)((x) + 0.5)) - uint64_t scale[6] = { - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), - ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5)) - }; - - int_enabled = ml_set_interrupts_enabled(FALSE); - -restart: - if (attempts >= 2) - panic("timeRDTSC() calibation failed with %d attempts\n", attempts); - attempts++; - enable_PIT2(); // turn on PIT2 - set_PIT2(0); // reset timer 2 to be zero - latchTime = rdtsc64(); // get the time stamp to time - latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes - set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second - saveTime = rdtsc64(); // now time how long a 20th a second is... - get_PIT2(&lastValue); - get_PIT2(&lastValue); // read twice, first value may be unreliable - do { - intermediate = get_PIT2(&timerValue); - if (timerValue > lastValue) { - printf("Hey we are going backwards! %u -> %u, restarting timing\n", - timerValue,lastValue); - set_PIT2(0); - disable_PIT2(); - goto restart; - } - lastValue = timerValue; - } while (timerValue > 5); - kprintf("timerValue %d\n",timerValue); - kprintf("intermediate 0x%016llx\n",intermediate); - kprintf("saveTime 0x%016llx\n",saveTime); - - intermediate -= saveTime; // raw count for about 1/20 second - intermediate *= scale[timerValue]; // rescale measured time spent - intermediate /= SAMPLE_NSECS; // so its exactly 1/20 a second - intermediate += latchTime; // add on our save fudge - - set_PIT2(0); // reset timer 2 to be zero - disable_PIT2(); // turn off PIT 2 - - ml_set_interrupts_enabled(int_enabled); - return intermediate; -} + asm volatile("movl %%edx,%%esi ;" + "mull %%ecx ;" + "movl %%edx,%%edi ;" + "movl %%esi,%%eax ;" + "mull %%ecx ;" + "addl %%edi,%%eax ;" + "adcl $0,%%edx " + : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi"); -static uint64_t -tsc_to_nanoseconds(uint64_t abstime) -{ - uint32_t numer; - uint32_t denom; - uint32_t intermediate[3]; - - numer = rtclock.timebase_const.numer; - denom = rtclock.timebase_const.denom; - if (denom == RTC_FAST_DENOM) { - abstime = fast_get_nano_from_abs(abstime, numer); - } else { - longmul(&abstime, numer, intermediate); - abstime = longdiv(intermediate, denom); - } - return abstime; + return (value); } -inline static mach_timespec_t -tsc_to_timespec(void) +uint64_t +tsc_to_nanoseconds(uint64_t value) { - uint64_t currNanos; - currNanos = rtc_nanotime_read(); - return nanos_to_timespec(currNanos); + return _tsc_to_nanoseconds(value); } -#define DECREMENTER_MAX UINT_MAX static uint32_t deadline_to_decrementer( uint64_t deadline, @@ -508,86 +142,28 @@ deadline_to_decrementer( return rtc_decrementer_min; else { delta = deadline - now; - return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); + return MIN(MAX(rtc_decrementer_min,delta),maxDec); } } -static inline uint64_t -lapic_time_countdown(uint32_t initial_count) -{ - boolean_t state; - uint64_t start_time; - uint64_t stop_time; - lapic_timer_count_t count; - - state = ml_set_interrupts_enabled(FALSE); - lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count); - start_time = rdtsc64(); - do { - lapic_get_timer(NULL, NULL, NULL, &count); - } while (count > 0); - stop_time = rdtsc64(); - ml_set_interrupts_enabled(state); - - return tsc_to_nanoseconds(stop_time - start_time); -} - -static void -rtc_lapic_timer_calibrate(void) -{ - uint32_t nsecs; - uint64_t countdown; - - if (!(cpuid_features() & CPUID_FEATURE_APIC)) - return; - - /* - * Set the local apic timer counting down to zero without an interrupt. - * Use the timestamp to calculate how long this takes. - */ - nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec); - - /* - * Compute a countdown ratio for a given time in nanoseconds. - * That is, countdown = time * numer / denom. - */ - countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs; - - nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown); - - rtc_lapic_scale.numer = countdown; - rtc_lapic_scale.denom = nsecs; - - kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n", - (uint32_t) countdown, nsecs); -} - -static void -rtc_lapic_set_timer( - uint32_t interval) -{ - uint64_t count; - - assert(rtc_lapic_scale.denom); - - count = interval * (uint64_t) rtc_lapic_scale.numer; - count /= rtc_lapic_scale.denom; - - lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count); -} - static void rtc_lapic_start_ticking(void) { uint64_t abstime; uint64_t first_tick; - uint64_t decr; + cpu_data_t *cdp = current_cpu_datap(); abstime = mach_absolute_time(); - first_tick = abstime + NSEC_PER_HZ; - current_cpu_datap()->cpu_rtc_tick_deadline = first_tick; - decr = deadline_to_decrementer(first_tick, abstime); - rtc_lapic_set_timer(decr); + rtclock_tick_interval = NSEC_PER_HZ; + + first_tick = abstime + rtclock_tick_interval; + cdp->rtclock_intr_deadline = first_tick; + + /* + * Force a complete re-evaluation of timer deadlines. + */ + cdp->rtcPop = EndOfAllTime; + etimer_resync_deadlines(); } /* @@ -596,20 +172,9 @@ rtc_lapic_start_ticking(void) */ int -sysclk_config(void) +rtclock_config(void) { - - mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return(1); - } - mp_enable_preemption(); - - timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL); - - simple_lock_init(&rtclock.lock, 0); - + /* nothing to do */ return (1); } @@ -617,235 +182,169 @@ sysclk_config(void) /* * Nanotime/mach_absolutime_time * ----------------------------- - * The timestamp counter (tsc) - which counts cpu clock cycles and can be read - * efficient by the kernel and in userspace - is the reference for all timing. - * However, the cpu clock rate is not only platform-dependent but can change - * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is - * identical to mach_absolute_time. The conversion to tsc to nanoseconds is - * encapsulated by nanotime. + * The timestamp counter (TSC) - which counts cpu clock cycles and can be read + * efficiently by the kernel and in userspace - is the reference for all timing. + * The cpu clock rate is platform-dependent and may stop or be reset when the + * processor is napped/slept. As a result, nanotime is the software abstraction + * used to maintain a monotonic clock, adjusted from an outside reference as needed. * * The kernel maintains nanotime information recording: - * - the current ratio of tsc to nanoseconds + * - the ratio of tsc to nanoseconds * with this ratio expressed as a 32-bit scale and shift * (power of 2 divider); - * - the tsc (step_tsc) and nanotime (step_ns) at which the current - * ratio (clock speed) began. - * So a tsc value can be converted to nanotime by: - * - * nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns - * - * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling - * involving a 96-bit intermediate value. However, by saving the converted - * values at each tick (or at any intervening speed-step) - base_tsc and - * base_ns - we can perform conversions relative to these and be assured that - * (tsc - tick_tsc) is 32-bits. Hence: + * - { tsc_base, ns_base } pair of corresponding timestamps. * - * fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns + * The tuple {tsc_base, ns_base, scale, shift} is exported in the commpage + * for the userspace nanotime routine to read. * - * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage - * for the userspace nanotime routine to read. A duplicate check_tsc is - * appended so that the consistency of the read can be verified. Note that - * this scheme is essential for MP systems in which the commpage is updated - * by the master cpu but may be read concurrently by other cpus. - * + * All of the routines which update the nanotime data are non-reentrant. This must + * be guaranteed by the caller. */ static inline void rtc_nanotime_set_commpage(rtc_nanotime_t *rntp) { - commpage_nanotime_t cp_nanotime; - - /* Only the master cpu updates the commpage */ - if (cpu_number() != master_cpu) - return; + commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift); +} - cp_nanotime.nt_base_tsc = rntp->rnt_tsc; - cp_nanotime.nt_base_ns = rntp->rnt_nanos; - cp_nanotime.nt_scale = rntp->rnt_scale; - cp_nanotime.nt_shift = rntp->rnt_shift; +/* + * rtc_nanotime_init: + * + * Intialize the nanotime info from the base time. Since + * the base value might be from a lower resolution clock, + * we compare it to the TSC derived value, and use the + * greater of the two values. + */ +static inline void +_rtc_nanotime_init(rtc_nanotime_t *rntp, uint64_t base) +{ + uint64_t nsecs, tsc = rdtsc64(); - commpage_set_nanotime(&cp_nanotime); + nsecs = _tsc_to_nanoseconds(tsc); + rtc_nanotime_store(tsc, MAX(nsecs, base), rntp->scale, rntp->shift, rntp); } static void -rtc_nanotime_init(void) +rtc_nanotime_init(uint64_t base) { - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; - rtc_nanotime_t *master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime; + rtc_nanotime_t *rntp = &rtc_nanotime_info; - if (cpu_number() == master_cpu) { - rntp->rnt_tsc = rdtsc64(); - rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc); - rntp->rnt_scale = rtc_quant_scale; - rntp->rnt_shift = rtc_quant_shift; - rntp->rnt_step_tsc = 0ULL; - rntp->rnt_step_nanos = 0ULL; - } else { - /* - * Copy master processor's nanotime info. - * Loop required in case this changes while copying. - */ - do { - *rntp = *master_rntp; - } while (rntp->rnt_tsc != master_rntp->rnt_tsc); - } + _rtc_nanotime_init(rntp, base); + rtc_nanotime_set_commpage(rntp); } -static inline void -_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t tsc) +/* + * rtc_nanotime_init: + * + * Call back from the commpage initialization to + * cause the commpage data to be filled in once the + * commpages have been created. + */ +void +rtc_nanotime_init_commpage(void) { - uint64_t tsc_delta; - uint64_t ns_delta; + spl_t s = splclock(); + + rtc_nanotime_set_commpage(&rtc_nanotime_info); - tsc_delta = tsc - rntp->rnt_step_tsc; - ns_delta = tsc_to_nanoseconds(tsc_delta); - rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta; - rntp->rnt_tsc = tsc; + splx(s); } -static void -rtc_nanotime_update(void) +/* + * rtc_nanotime_update: + * + * Update the nanotime info from the base time. Since + * the base value might be from a lower resolution clock, + * we compare it to the TSC derived value, and use the + * greater of the two values. + * + * N.B. In comparison to the above init routine, this assumes + * that the TSC has remained monotonic compared to the tsc_base + * value, which is not the case after S3 sleep. + */ +static inline void +_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t base) { - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + uint64_t nsecs, tsc = rdtsc64(); - assert(get_preemption_level() > 0); - assert(!ml_get_interrupts_enabled()); - - _rtc_nanotime_update(rntp, rdtsc64()); - rtc_nanotime_set_commpage(rntp); + nsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base); + rtc_nanotime_store(tsc, MAX(nsecs, base), rntp->scale, rntp->shift, rntp); } static void -rtc_nanotime_scale_update(void) +rtc_nanotime_update( + uint64_t base) { - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; - uint64_t tsc = rdtsc64(); + rtc_nanotime_t *rntp = &rtc_nanotime_info; assert(!ml_get_interrupts_enabled()); - /* - * Update time based on past scale. - */ - _rtc_nanotime_update(rntp, tsc); - - /* - * Update scale and timestamp this update. - */ - rntp->rnt_scale = rtc_quant_scale; - rntp->rnt_shift = rtc_quant_shift; - rntp->rnt_step_tsc = rntp->rnt_tsc; - rntp->rnt_step_nanos = rntp->rnt_nanos; - - /* Export update to userland */ + _rtc_nanotime_update(rntp, base); rtc_nanotime_set_commpage(rntp); } +/* + * rtc_nanotime_read: + * + * Returns the current nanotime value, accessable from any + * context. + */ static uint64_t -_rtc_nanotime_read(void) -{ - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; - uint64_t rnt_tsc; - uint32_t rnt_scale; - uint32_t rnt_shift; - uint64_t rnt_nanos; - uint64_t tsc; - uint64_t tsc_delta; - - rnt_scale = rntp->rnt_scale; - if (rnt_scale == 0) - return 0ULL; - - rnt_shift = rntp->rnt_shift; - rnt_nanos = rntp->rnt_nanos; - rnt_tsc = rntp->rnt_tsc; - tsc = rdtsc64(); - - tsc_delta = tsc - rnt_tsc; - if ((tsc_delta >> 32) != 0) - return rnt_nanos + tsc_to_nanoseconds(tsc_delta); - - /* Let the compiler optimize(?): */ - if (rnt_shift == 32) - return rnt_nanos + ((tsc_delta * rnt_scale) >> 32); - else - return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift); -} - -uint64_t rtc_nanotime_read(void) { - uint64_t result; - uint64_t rnt_tsc; - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + rtc_nanotime_t rnt, *rntp = &rtc_nanotime_info; + uint64_t result; - /* - * Use timestamp to ensure the uptime record isn't changed. - * This avoids disabling interrupts. - * And not this is a per-cpu structure hence no locking. - */ do { - rnt_tsc = rntp->rnt_tsc; - result = _rtc_nanotime_read(); - } while (rnt_tsc != rntp->rnt_tsc); + rtc_nanotime_load(rntp, &rnt); + result = rnt.ns_base + _tsc_to_nanoseconds(rdtsc64() - rnt.tsc_base); + } while (rntp->tsc_base != rnt.tsc_base); - return result; + return (result); } - /* - * This function is called by the speed-step driver when a - * change of cpu clock frequency is about to occur. - * The scale is not changed until rtc_clock_stepped() is called. - * Between these times there is an uncertainty is exactly when - * the change takes effect. FIXME: by using another timing source - * we could eliminate this error. + * rtc_clock_napped: + * + * Invoked from power manangement when we have awoken from a nap (C3/C4) + * during which the TSC lost counts. The nanotime data is updated according + * to the provided nanosecond base value. + * + * The caller must guarantee non-reentrancy. */ +void +rtc_clock_napped( + uint64_t base) +{ + rtc_nanotime_update(base); +} + void rtc_clock_stepping(__unused uint32_t new_frequency, __unused uint32_t old_frequency) { - boolean_t istate; - - istate = ml_set_interrupts_enabled(FALSE); - rtc_nanotime_scale_update(); - ml_set_interrupts_enabled(istate); + panic("rtc_clock_stepping unsupported"); } -/* - * This function is called by the speed-step driver when a - * change of cpu clock frequency has just occured. This change - * is expressed as a ratio relative to the boot clock rate. - */ void -rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency) +rtc_clock_stepped(__unused uint32_t new_frequency, + __unused uint32_t old_frequency) { - boolean_t istate; - - istate = ml_set_interrupts_enabled(FALSE); - if (rtc_boot_frequency == 0) { - /* - * At the first ever stepping, old frequency is the real - * initial clock rate. This step and all others are based - * relative to this initial frequency at which the tsc - * calibration was made. Hence we must remember this base - * frequency as reference. - */ - rtc_boot_frequency = old_frequency; - } - rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency / - rtc_boot_frequency); - rtc_nanotime_scale_update(); - ml_set_interrupts_enabled(istate); + panic("rtc_clock_stepping unsupported"); } /* - * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep + * rtc_sleep_wakeup: + * + * Invoked from power manageent when we have awoken from a sleep (S3) + * and the TSC has been reset. The nanotime data is updated based on + * the HPET value. + * + * The caller must guarantee non-reentrancy. */ void rtc_sleep_wakeup(void) { - rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; - - boolean_t istate; + boolean_t istate; istate = ml_set_interrupts_enabled(FALSE); @@ -853,12 +352,8 @@ rtc_sleep_wakeup(void) * Reset nanotime. * The timestamp counter will have been reset * but nanotime (uptime) marches onward. - * We assume that we're still at the former cpu frequency. */ - rntp->rnt_tsc = rdtsc64(); - rntp->rnt_step_tsc = 0ULL; - rntp->rnt_step_nanos = rntp->rnt_nanos; - rtc_nanotime_set_commpage(rntp); + rtc_nanotime_init(tmrCvt(rdHPET(), hpetCvtt2n)); /* Restart tick interrupts from the LAPIC timer */ rtc_lapic_start_ticking(); @@ -871,100 +366,68 @@ rtc_sleep_wakeup(void) * In addition, various variables used to support the clock are initialized. */ int -sysclk_init(void) +rtclock_init(void) { uint64_t cycles; - mp_disable_preemption(); + assert(!ml_get_interrupts_enabled()); + if (cpu_number() == master_cpu) { + + assert(tscFreq); + rtc_set_timescale(tscFreq); + /* - * Perform calibration. - * The PIT is used as the reference to compute how many - * TCS counts (cpu clock cycles) occur per second. + * Adjust and set the exported cpu speed. */ - rtc_cycle_count = timeRDTSC(); - cycles = rtc_set_cyc_per_sec(rtc_cycle_count); + cycles = rtc_export_speed(tscFreq); /* * Set min/max to actual. * ACPI may update these later if speed-stepping is detected. */ - gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; - gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; - printf("[RTCLOCK] frequency %llu (%llu)\n", - cycles, rtc_cyc_per_sec); + gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; + gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; - rtc_lapic_timer_calibrate(); + /* + * Compute the longest interval we can represent. + */ + maxDec = tmrCvt(0x7fffffffULL, busFCvtt2n); + kprintf("maxDec: %lld\n", maxDec); /* Minimum interval is 1usec */ - rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, - 0ULL); + rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, 0ULL); /* Point LAPIC interrupts to hardclock() */ lapic_set_timer_func((i386_intr_func_t) rtclock_intr); clock_timebase_init(); - rtc_initialized = TRUE; + ml_init_lock_timeout(); } - rtc_nanotime_init(); - rtc_lapic_start_ticking(); - mp_enable_preemption(); - return (1); } -/* - * Get the clock device time. This routine is responsible - * for converting the device's machine dependent time value - * into a canonical mach_timespec_t value. - */ -static kern_return_t -sysclk_gettime_internal( - mach_timespec_t *cur_time) /* OUT */ -{ - *cur_time = tsc_to_timespec(); - return (KERN_SUCCESS); -} +// utility routine +// Code to calculate how many processor cycles are in a second... -kern_return_t -sysclk_gettime( - mach_timespec_t *cur_time) /* OUT */ +static void +rtc_set_timescale(uint64_t cycles) { - return sysclk_gettime_internal(cur_time); -} + rtc_nanotime_info.scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles; + rtc_nanotime_info.shift = 32; -void -sysclk_gettime_interrupts_disabled( - mach_timespec_t *cur_time) /* OUT */ -{ - (void) sysclk_gettime_internal(cur_time); + rtc_nanotime_init(0); } -// utility routine -// Code to calculate how many processor cycles are in a second... - static uint64_t -rtc_set_cyc_per_sec(uint64_t cycles) +rtc_export_speed(uint64_t cyc_per_sec) { + uint64_t cycles; - if (cycles > (NSEC_PER_SEC/20)) { - // we can use just a "fast" multiply to get nanos - rtc_quant_shift = 32; - rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); - rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20 - rtclock.timebase_const.denom = RTC_FAST_DENOM; - } else { - rtc_quant_shift = 26; - rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); - rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20 - rtclock.timebase_const.denom = cycles; - } - rtc_cyc_per_sec = cycles*20; // multiply it by 20 and we are done.. - // BUT we also want to calculate... - - cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) + /* Round: */ + cycles = ((cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) / UI_CPUFREQ_ROUNDING_FACTOR) * UI_CPUFREQ_ROUNDING_FACTOR; @@ -978,7 +441,7 @@ rtc_set_cyc_per_sec(uint64_t cycles) } gPEClockFrequencyInfo.cpu_frequency_hz = cycles; - kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec); + kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, cyc_per_sec); return(cycles); } @@ -987,12 +450,17 @@ clock_get_system_microtime( uint32_t *secs, uint32_t *microsecs) { - mach_timespec_t now; - - (void) sysclk_gettime_internal(&now); + uint64_t now = rtc_nanotime_read(); + uint32_t remain; - *secs = now.tv_sec; - *microsecs = now.tv_nsec / NSEC_PER_USEC; + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (remain) + : "A" (now), "r" (NSEC_PER_SEC)); + asm volatile( + "divl %3" + : "=a" (*microsecs) + : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); } void @@ -1000,291 +468,39 @@ clock_get_system_nanotime( uint32_t *secs, uint32_t *nanosecs) { - mach_timespec_t now; - - (void) sysclk_gettime_internal(&now); - - *secs = now.tv_sec; - *nanosecs = now.tv_nsec; -} - -/* - * Get clock device attributes. - */ -kern_return_t -sysclk_getattr( - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ -{ - if (*count != 1) - return (KERN_FAILURE); - switch (flavor) { - - case CLOCK_GET_TIME_RES: /* >0 res */ - *(clock_res_t *) attr = rtc_intr_nsec; - break; - - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - case CLOCK_ALARM_MAXRES: - case CLOCK_ALARM_MINRES: - *(clock_res_t *) attr = 0; - break; - - default: - return (KERN_INVALID_VALUE); - } - return (KERN_SUCCESS); -} - -/* - * Set next alarm time for the clock device. This call - * always resets the time to deliver an alarm for the - * clock. - */ -void -sysclk_setalarm( - mach_timespec_t *alarm_time) -{ - timer_call_enter(&rtclock_alarm_timer, - (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC - + alarm_time->tv_nsec); -} + uint64_t now = rtc_nanotime_read(); -/* - * Configure the calendar clock. - */ -int -calend_config(void) -{ - return bbc_config(); -} - -/* - * Initialize calendar clock. - */ -int -calend_init(void) -{ - return (1); -} - -/* - * Get the current clock time. - */ -kern_return_t -calend_gettime( - mach_timespec_t *cur_time) /* OUT */ -{ - spl_t s; - - RTC_LOCK(s); - if (!rtclock.calend_is_set) { - RTC_UNLOCK(s); - return (KERN_FAILURE); - } - - (void) sysclk_gettime_internal(cur_time); - ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset); - RTC_UNLOCK(s); - - return (KERN_SUCCESS); -} - -void -clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs) -{ - mach_timespec_t now; - - calend_gettime(&now); - - *secs = now.tv_sec; - *microsecs = now.tv_nsec / NSEC_PER_USEC; -} - -void -clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs) -{ - mach_timespec_t now; - - calend_gettime(&now); - - *secs = now.tv_sec; - *nanosecs = now.tv_nsec; + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (*nanosecs) + : "A" (now), "r" (NSEC_PER_SEC)); } void -clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs) -{ - mach_timespec_t new_time, curr_time; - uint32_t old_offset; - spl_t s; - - new_time.tv_sec = secs; - new_time.tv_nsec = microsecs * NSEC_PER_USEC; - - RTC_LOCK(s); - old_offset = rtclock.calend_offset.tv_sec; - (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset = new_time; - SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); - rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset; - rtclock.calend_is_set = TRUE; - RTC_UNLOCK(s); - - (void) bbc_settime(&new_time); - - host_notify_calendar_change(); -} - -/* - * Get clock device attributes. - */ -kern_return_t -calend_getattr( - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ -{ - if (*count != 1) - return (KERN_FAILURE); - switch (flavor) { - - case CLOCK_GET_TIME_RES: /* >0 res */ - *(clock_res_t *) attr = rtc_intr_nsec; - break; - - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - case CLOCK_ALARM_MINRES: - case CLOCK_ALARM_MAXRES: - *(clock_res_t *) attr = 0; - break; - - default: - return (KERN_INVALID_VALUE); - } - return (KERN_SUCCESS); -} - -#define tickadj (40*NSEC_PER_USEC) /* "standard" skew, ns / tick */ -#define bigadj (NSEC_PER_SEC) /* use 10x skew above bigadj ns */ - -uint32_t -clock_set_calendar_adjtime( - int32_t *secs, - int32_t *microsecs) -{ - int64_t total, ototal; - uint32_t interval = 0; - spl_t s; - - total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; - - RTC_LOCK(s); - ototal = rtclock.calend_adjtotal; - - if (total != 0) { - int32_t delta = tickadj; - - if (total > 0) { - if (total > bigadj) - delta *= 10; - if (delta > total) - delta = total; - } - else { - if (total < -bigadj) - delta *= 10; - delta = -delta; - if (delta < total) - delta = total; - } - - rtclock.calend_adjtotal = total; - rtclock.calend_adjdelta = delta; - - interval = NSEC_PER_HZ; - } - else - rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0; - - RTC_UNLOCK(s); - - if (ototal == 0) - *secs = *microsecs = 0; - else { - *secs = ototal / NSEC_PER_SEC; - *microsecs = ototal % NSEC_PER_SEC; - } - - return (interval); -} - -uint32_t -clock_adjust_calendar(void) -{ - uint32_t interval = 0; - int32_t delta; - spl_t s; - - RTC_LOCK(s); - delta = rtclock.calend_adjdelta; - ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta); - - rtclock.calend_adjtotal -= delta; - - if (delta > 0) { - if (delta > rtclock.calend_adjtotal) - rtclock.calend_adjdelta = rtclock.calend_adjtotal; - } - else - if (delta < 0) { - if (delta < rtclock.calend_adjtotal) - rtclock.calend_adjdelta = rtclock.calend_adjtotal; - } - - if (rtclock.calend_adjdelta != 0) - interval = NSEC_PER_HZ; +clock_gettimeofday_set_commpage( + uint64_t abstime, + uint64_t epoch, + uint64_t offset, + uint32_t *secs, + uint32_t *microsecs) +{ + uint64_t now = abstime; + uint32_t remain; - RTC_UNLOCK(s); + now += offset; - return (interval); -} + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (remain) + : "A" (now), "r" (NSEC_PER_SEC)); + asm volatile( + "divl %3" + : "=a" (*microsecs) + : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); -void -clock_initialize_calendar(void) -{ - mach_timespec_t bbc_time, curr_time; - spl_t s; - - if (bbc_gettime(&bbc_time) != KERN_SUCCESS) - return; - - RTC_LOCK(s); - if (rtclock.boottime == 0) - rtclock.boottime = bbc_time.tv_sec; - (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset = bbc_time; - SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); - rtclock.calend_is_set = TRUE; - RTC_UNLOCK(s); - - host_notify_calendar_change(); -} + *secs += epoch; -void -clock_get_boottime_nanotime( - uint32_t *secs, - uint32_t *nanosecs) -{ - *secs = rtclock.boottime; - *nanosecs = 0; + commpage_set_timestamp(abstime - remain, *secs, NSEC_PER_SEC); } void @@ -1294,37 +510,6 @@ clock_timebase_info( info->numer = info->denom = 1; } -void -clock_set_timer_deadline( - uint64_t deadline) -{ - spl_t s; - cpu_data_t *pp = current_cpu_datap(); - rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; - uint64_t abstime; - uint64_t decr; - - assert(get_preemption_level() > 0); - assert(rtclock_timer_expire); - - RTC_INTRS_OFF(s); - mytimer->deadline = deadline; - mytimer->is_set = TRUE; - if (!mytimer->has_expired) { - abstime = mach_absolute_time(); - if (mytimer->deadline < pp->cpu_rtc_tick_deadline) { - decr = deadline_to_decrementer(mytimer->deadline, - abstime); - rtc_lapic_set_timer(decr); - pp->cpu_rtc_intr_deadline = mytimer->deadline; - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | - DBG_FUNC_NONE, decr, 2, 0, 0, 0); - } - } - RTC_INTRS_ON(s); -} - void clock_set_timer_func( clock_timer_func_t func) @@ -1337,87 +522,83 @@ clock_set_timer_func( * Real-time clock device interrupt. */ void -rtclock_intr(struct i386_interrupt_state *regs) +rtclock_intr( + x86_saved_state_t *tregs) { + uint64_t rip; + boolean_t user_mode = FALSE; uint64_t abstime; uint32_t latency; - uint64_t decr; - uint64_t decr_tick; - uint64_t decr_timer; cpu_data_t *pp = current_cpu_datap(); - rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; assert(get_preemption_level() > 0); assert(!ml_get_interrupts_enabled()); - abstime = _rtc_nanotime_read(); - latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline; - if (pp->cpu_rtc_tick_deadline <= abstime) { - rtc_nanotime_update(); - clock_deadline_for_periodic_event( - NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline); - hertz_tick( -#if STAT_TIME - NSEC_PER_HZ, -#endif - (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0), - regs->eip); - } + abstime = rtc_nanotime_read(); + latency = (uint32_t) abstime - pp->rtcPop; + + if (is_saved_state64(tregs) == TRUE) { + x86_saved_state64_t *regs; + + regs = saved_state64(tregs); + + user_mode = TRUE; + rip = regs->isf.rip; + } else { + x86_saved_state32_t *regs; - abstime = _rtc_nanotime_read(); - if (mytimer->is_set && mytimer->deadline <= abstime) { - mytimer->has_expired = TRUE; - mytimer->is_set = FALSE; - (*rtclock_timer_expire)(abstime); - assert(!ml_get_interrupts_enabled()); - mytimer->has_expired = FALSE; + regs = saved_state32(tregs); + + if (regs->cs & 0x03) + user_mode = TRUE; + rip = regs->eip; } /* Log the interrupt service latency (-ve value expected by tool) */ KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, - -latency, (uint32_t)regs->eip, 0, 0, 0); + -latency, (uint32_t)rip, user_mode, 0, 0); - abstime = _rtc_nanotime_read(); - decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime); - decr_timer = (mytimer->is_set) ? - deadline_to_decrementer(mytimer->deadline, abstime) : - DECREMENTER_MAX; - decr = MIN(decr_tick, decr_timer); - pp->cpu_rtc_intr_deadline = abstime + decr; + /* call the generic etimer */ + etimer_intr(user_mode, rip); +} - rtc_lapic_set_timer(decr); +/* + * Request timer pop from the hardware + */ - /* Log the new decrementer value */ - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, - decr, 3, 0, 0, 0); +int +setPop( + uint64_t time) +{ + uint64_t now; + uint32_t decr; + uint64_t count; + + now = rtc_nanotime_read(); /* The time in nanoseconds */ + decr = deadline_to_decrementer(time, now); + + count = tmrCvt(decr, busFCvtn2t); + lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count); + return decr; /* Pass back what we set */ } -static void -rtclock_alarm_expire( - __unused timer_call_param_t p0, - __unused timer_call_param_t p1) -{ - mach_timespec_t clock_time; - (void) sysclk_gettime_internal(&clock_time); - clock_alarm_intr(SYSTEM_CLOCK, &clock_time); +uint64_t +mach_absolute_time(void) +{ + return rtc_nanotime_read(); } void -clock_get_uptime( +clock_interval_to_absolutetime_interval( + uint32_t interval, + uint32_t scale_factor, uint64_t *result) { - *result = rtc_nanotime_read(); -} - -uint64_t -mach_absolute_time(void) -{ - return rtc_nanotime_read(); + *result = (uint64_t)interval * scale_factor; } void @@ -1439,37 +620,24 @@ absolutetime_to_microtime( } void -clock_interval_to_deadline( - uint32_t interval, - uint32_t scale_factor, - uint64_t *result) -{ - uint64_t abstime; - - clock_get_uptime(result); - - clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); - - *result += abstime; -} - -void -clock_interval_to_absolutetime_interval( - uint32_t interval, - uint32_t scale_factor, - uint64_t *result) +absolutetime_to_nanotime( + uint64_t abstime, + uint32_t *secs, + uint32_t *nanosecs) { - *result = (uint64_t)interval * scale_factor; + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (*nanosecs) + : "A" (abstime), "r" (NSEC_PER_SEC)); } void -clock_absolutetime_interval_to_deadline( - uint64_t abstime, - uint64_t *result) +nanotime_to_absolutetime( + uint32_t secs, + uint32_t nanosecs, + uint64_t *result) { - clock_get_uptime(result); - - *result += abstime; + *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs; } void diff --git a/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp b/osfmk/i386/rtclock.h similarity index 63% rename from iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp rename to osfmk/i386/rtclock.h index c84abc4ce..c61a55884 100644 --- a/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp +++ b/osfmk/i386/rtclock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,26 +19,23 @@ * * @APPLE_LICENSE_HEADER_END@ */ -// -// Backdoor hack for Intel Clock. -// -// +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: rtclock.h + * Purpose: Routines for handling the machine dependent + * real-time clock. + */ -#include "AppleIntelClock.h" +#ifndef _I386_RTCLOCK_H_ +#define _I386_RTCLOCK_H_ -#define super IOService -OSDefineMetaClassAndStructors(AppleIntelClock, IOService); +#include -bool -AppleIntelClock::start(IOService *provider) -{ - if (!super::start(provider)) - return false; +struct cpu_data; - /* - * The clock is already provided by the kernel, so all we need - * here is publish its availability for any IOKit client to use. - */ - publishResource("IORTC", this); - return true; -} +#endif /* _I386_RTCLOCK_H_ */ diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h index b2b83c246..eca69d6f3 100644 --- a/osfmk/i386/seg.h +++ b/osfmk/i386/seg.h @@ -55,6 +55,7 @@ #include #include +#include #include /* @@ -87,21 +88,14 @@ selector_to_sel(uint16_t selector) return (tconv.sel); } -#define LDTSZ 15 /* size of the kernel ldt in entries*/ +#define LDTSZ 8192 /* size of the kernel ldt in entries */ +#define LDTSZ_MIN 17 /* kernel ldt entries used by the system */ #if MACH_KDB -#ifdef MACH_BSD -#define GDTSZ 14 +#define GDTSZ 19 #else -#define GDTSZ 11 +#define GDTSZ 18 #endif -#else /* MACH_KDB */ -#ifdef MACH_BSD -#define GDTSZ 13 -#else -#define GDTSZ 10 -#endif -#endif /* MACH_KDB */ /* * Interrupt table is always 256 entries long. @@ -116,7 +110,7 @@ selector_to_sel(uint16_t selector) * Real segment descriptor. */ struct real_descriptor { - unsigned int limit_low:16, /* limit 0..15 */ + uint32_t limit_low:16, /* limit 0..15 */ base_low:16, /* base 0..15 */ base_med:8, /* base 16..23 */ access:8, /* access byte */ @@ -124,14 +118,34 @@ struct real_descriptor { granularity:4, /* granularity */ base_high:8; /* base 24..31 */ }; - +struct real_descriptor64 { + uint32_t limit_low16:16, /* limit 0..15 */ + base_low16:16, /* base 0..15 */ + base_med8:8, /* base 16..23 */ + access8:8, /* access byte */ + limit_high4:4, /* limit 16..19 */ + granularity4:4, /* granularity */ + base_high8:8, /* base 24..31 */ + base_top32:32, /* base 32..63 */ + reserved32:32; /* reserved/zero */ +}; struct real_gate { - unsigned int offset_low:16, /* offset 0..15 */ + uint32_t offset_low:16, /* offset 0..15 */ selector:16, word_count:8, access:8, offset_high:16; /* offset 16..31 */ }; +struct real_gate64 { + uint32_t offset_low16:16, /* offset 0..15 */ + selector16:16, + IST:3, + zeroes5:5, + access8:8, + offset_high16:16, /* offset 16..31 */ + offset_top32:32, /* offset 32..63 */ + reserved32:32; /* reserved/zero */ +}; /* * We build descriptors and gates in a 'fake' format to let the @@ -139,28 +153,56 @@ struct real_gate { * at runtime. */ struct fake_descriptor { - unsigned int offset:32; /* offset */ - unsigned int lim_or_seg:20; /* limit */ + uint32_t offset:32; /* offset */ + uint32_t lim_or_seg:20; /* limit */ /* or segment, for gate */ - unsigned int size_or_wdct:4; /* size/granularity */ + uint32_t size_or_wdct:4; /* size/granularity */ /* word count, for gate */ - unsigned int access:8; /* access */ + uint32_t access:8; /* access */ }; +struct fake_descriptor64 { + uint32_t offset[2]; /* offset [0..31,32..63] */ + uint32_t lim_or_seg:20; /* limit */ + /* or segment, for gate */ + uint32_t size_or_IST:4; /* size/granularity */ + /* IST for gates */ + uint32_t access:8; /* access */ + uint32_t reserved:32; /* reserved/zero */ +}; +#define FAKE_UBER64(addr32) { (uint32_t) (addr32), KERNEL_UBER_BASE_HI32 } +#define FAKE_COMPAT(addr32) { (uint32_t) (addr32), 0x0 } +#define UBER64(addr32) ((addr64_t) addr32 + KERNEL_UBER_BASE) /* * Boot-time data for master (or only) CPU */ -extern struct fake_descriptor idt[IDTSZ]; -extern struct fake_descriptor gdt[GDTSZ]; -extern struct fake_descriptor ldt[LDTSZ]; -extern struct i386_tss ktss; +extern struct fake_descriptor master_idt[IDTSZ]; +extern struct fake_descriptor master_gdt[GDTSZ]; +extern struct fake_descriptor master_ldt[LDTSZ]; +extern struct i386_tss master_ktss; +extern struct sysenter_stack master_sstk; + +extern struct fake_descriptor64 master_idt64[IDTSZ]; +extern struct fake_descriptor64 kernel_ldt_desc64; +extern struct fake_descriptor64 kernel_tss_desc64; +extern struct x86_64_tss master_ktss64; __BEGIN_DECLS +extern char df_task_stack[]; +extern char df_task_stack_end[]; +extern struct i386_tss master_dftss; +extern void df_task_start(void); + +extern char mc_task_stack[]; +extern char mc_task_stack_end[]; +extern struct i386_tss master_mctss; +extern void mc_task_start(void); + #if MACH_KDB extern char db_stack_store[]; extern char db_task_stack_store[]; -extern struct i386_tss dbtss; +extern struct i386_tss master_dbtss; extern void db_task_start(void); #endif /* MACH_KDB */ @@ -168,6 +210,7 @@ __END_DECLS #endif /*__ASSEMBLER__*/ +#define SZ_64 0x2 /* 64-bit segment */ #define SZ_32 0x4 /* 32-bit segment */ #define SZ_G 0x8 /* 4K limit field */ @@ -215,18 +258,27 @@ __END_DECLS * Convert selector to descriptor table index. */ #define sel_idx(sel) (selector_to_sel(sel).index) +#define SEL_TO_INDEX(s) ((s)>>3) #define NULL_SEG 0 /* * User descriptors for MACH - 32-bit flat address space */ -#define USER_SCALL 0x07 /* system call gate */ -#define USER_RPC 0x0f /* mach rpc call gate */ -#define USER_CS 0x17 /* user code segment */ -#define USER_DS 0x1f /* user data segment */ -#define USER_CTHREAD 0x27 /* user cthread area */ -#define USER_SETTABLE 0x2f /* start of user settable ldt entries */ +#define SYSENTER_CS 0x07 /* sysenter kernel code segment */ +#define SYSENTER_DS 0x0f /* sysenter kernel data segment */ +#define USER_CS 0x17 /* user code segment + Must be SYSENTER_CS+16 for sysexit */ +/* Special case: sysenter with EFL_TF (trace bit) set - use iret not sysexit */ +#define SYSENTER_TF_CS (USER_CS|0x10000) +#define USER_DS 0x1f /* user data segment + Must be SYSENTER_CS+24 for sysexit */ +#define USER64_CS 0x27 /* 64-bit user code segment + Must be USER_CS+16 for sysret */ +#define USER64_DS USER_DS /* 64-bit user data segment == 32-bit */ +#define SYSCALL_CS 0x2f /* 64-bit syscall pseudo-segment */ +#define USER_CTHREAD 0x37 /* user cthread area */ +#define USER_SETTABLE 0x3f /* start of user settable ldt entries */ #define USLDTSZ 10 /* number of user settable entries */ /* @@ -235,29 +287,41 @@ __END_DECLS #define KERNEL_CS 0x08 /* kernel code */ #define KERNEL_DS 0x10 /* kernel data */ #define KERNEL_LDT 0x18 /* master LDT */ -#define KERNEL_TSS 0x20 /* master TSS (uniprocessor) */ -#ifdef MACH_BSD -#define BSD_SCALL_SEL 0x28 /* BSD System calls */ -#define MK25_SCALL_SEL 0x30 /* MK25 System Calls */ -#define MACHDEP_SCALL_SEL 0x38 /* Machdep SYstem calls */ -#else -#define USER_LDT 0x28 /* place for per-thread LDT */ -#define USER_TSS 0x30 /* place for per-thread TSS - that holds IO bitmap */ -#define FPE_CS 0x38 /* floating-point emulator code */ -#endif -#define USER_FPREGS 0x40 /* user-mode access to saved - floating-point registers */ +#define KERNEL_LDT_2 0x20 /* master LDT expanded for 64-bit */ +#define KERNEL_TSS 0x28 /* master TSS */ +#define KERNEL_TSS_2 0x30 /* master TSS expanded for 64-bit */ + +#define MC_TSS 0x38 /* machine-check handler TSS */ + #define CPU_DATA_GS 0x48 /* per-cpu data */ -#ifdef MACH_BSD +#define DF_TSS 0x50 /* double-fault handler TSS */ + #define USER_LDT 0x58 #define USER_TSS 0x60 #define FPE_CS 0x68 -#endif + +#define USER_WINDOW_SEL 0x70 /* window for copyin/copyout */ +#define PHYS_WINDOW_SEL 0x78 /* window for copyin/copyout */ + +#define KERNEL64_CS 0x80 /* kernel 64-bit code */ +#define KERNEL64_SS 0x88 /* kernel 64-bit (syscall) stack */ #if MACH_KDB -#define DEBUG_TSS 0x50 /* debug TSS (uniprocessor) */ +#define DEBUG_TSS 0x90 /* debug TSS (uniprocessor) */ #endif +struct __gdt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); + unsigned short pad; +} __attribute__ ((packed)); + +struct __idt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); + unsigned short pad; +} __attribute__ ((packed)); + + #endif /* _I386_SEG_H_ */ diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s index 830c1284b..efd9a9c3a 100644 --- a/osfmk/i386/start.s +++ b/osfmk/i386/start.s @@ -62,6 +62,7 @@ #include #include +#include /* * GAS won't handle an intersegment jump with a relocatable offset. @@ -73,38 +74,26 @@ -#define KVTOPHYS (-KERNELBASE) -#define KVTOLINEAR LINEAR_KERNELBASE +#define PA(addr) (addr) +#define VA(addr) (addr) - -#define PA(addr) ((addr)+KVTOPHYS) -#define VA(addr) ((addr)-KVTOPHYS) - - .data -#if 0 /* Anyone need this? */ - .align 2 - .globl EXT(_kick_buffer_) -EXT(_kick_buffer_): - .long 1 - .long 3 - .set .,.+16836 -#endif /* XXX */ /* * Interrupt and bootup stack for initial processor. */ + /* in the __HIB section since the hibernate restore code uses this stack. */ .section __HIB, __data - .align ALIGN + .align 12 - .globl EXT(intstack) -EXT(intstack): + .globl EXT(low_intstack) +EXT(low_intstack): .globl EXT(gIOHibernateRestoreStack) EXT(gIOHibernateRestoreStack): .set ., .+INTSTACK_SIZE - .globl EXT(eintstack) -EXT(eintstack:) + .globl EXT(low_eintstack) +EXT(low_eintstack:) .globl EXT(gIOHibernateRestoreStackEnd) EXT(gIOHibernateRestoreStackEnd): @@ -113,26 +102,51 @@ EXT(gIOHibernateRestoreStackEnd): */ .align ALIGN .globl EXT(gdtptr) + /* align below properly */ + .word 0 LEXT(gdtptr) .word Times(8,GDTSZ)-1 - .long EXT(gdt) + .long EXT(master_gdt) .align ALIGN .globl EXT(idtptr) + /* align below properly */ + .word 0 LEXT(idtptr) .word Times(8,IDTSZ)-1 - .long EXT(idt) + .long EXT(master_idt) - /* back to the regular __DATA section. */ + /* back to the regular __DATA section. */ .section __DATA, __data +/* + * Stack for last-gasp double-fault handler. + */ + .align 12 + .globl EXT(df_task_stack) +EXT(df_task_stack): + .set ., .+INTSTACK_SIZE + .globl EXT(df_task_stack_end) +EXT(df_task_stack_end): + + +/* + * Stack for machine-check handler. + */ + .align 12 + .globl EXT(mc_task_stack) +EXT(mc_task_stack): + .set ., .+INTSTACK_SIZE + .globl EXT(mc_task_stack_end) +EXT(mc_task_stack_end): + #if MACH_KDB /* * Kernel debugger stack for each processor. */ - .align ALIGN + .align 12 .globl EXT(db_stack_store) EXT(db_stack_store): .set ., .+(INTSTACK_SIZE*MAX_CPUS) @@ -140,7 +154,7 @@ EXT(db_stack_store): /* * Stack for last-ditch debugger task for each processor. */ - .align ALIGN + .align 12 .globl EXT(db_task_stack_store) EXT(db_task_stack_store): .set ., .+(INTSTACK_SIZE*MAX_CPUS) @@ -155,54 +169,46 @@ EXT(kgdb_stack_store): #endif /* MACH_KDB */ .data - /* - * start_lock is very special. We initialize the - * lock at allocation time rather than at run-time. - * Although start_lock should be an instance of a - * hw_lock, we hand-code all manipulation of the lock - * because the hw_lock code may require function calls; - * and we'd rather not introduce another dependency on - * a working stack at this point. - */ - .globl EXT(start_lock) -EXT(start_lock): - .long 0 /* synchronizes processor startup */ - - .globl EXT(master_is_up) -EXT(master_is_up): - .long 0 /* 1 when OK for other processors */ - /* to start */ - .globl EXT(mp_boot_pde) -EXT(mp_boot_pde): - .long 0 - -_KERNend: .long 0 /* phys addr end of kernel (just after bss) */ -physfree: .long 0 /* phys addr of next free page */ - - .globl _IdlePTD -_IdlePTD: .long 0 /* phys addr of kernel PTD */ +physfree: + .long 0 /* phys addr of next free page */ + + .globl EXT(IdlePTD) +EXT(IdlePTD): + .long 0 /* phys addr of kernel PTD */ #ifdef PAE - .globl _IdlePDPT -_IdlePDPT: .long 0 /* phys addr of kernel PDPT */ + .globl EXT(IdlePDPT) +EXT(IdlePDPT): + .long 0 /* phys addr of kernel PDPT */ +#endif +#ifdef X86_64 + .globl EXT(IdlePML4) +EXT(IdlePML4): + .long 0 + .globl EXT(IdlePDPT64) +EXT(IdlePDPT64): + .long 0 #endif - .globl _KPTphys - -_KPTphys: .long 0 /* phys addr of kernel page tables */ +KPTphys: + .long 0 /* phys addr of kernel page tables */ + .globl EXT(KernelRelocOffset) +EXT(KernelRelocOffset): + .long 0 /* Kernel relocation offset */ + /* Some handy macros */ -#define ALLOCPAGES(npages) \ - movl PA(physfree), %esi ; \ - movl $((npages) * PAGE_SIZE), %eax ; \ - addl %esi, %eax ; \ - movl %eax, PA(physfree) ; \ - movl %esi, %edi ; \ - movl $((npages) * PAGE_SIZE / 4),%ecx ; \ - xorl %eax,%eax ; \ - cld ; \ - rep ; \ +#define ALLOCPAGES(npages) \ + movl PA(physfree), %esi ; \ + movl $((npages) * PAGE_SIZE), %eax ; \ + addl %esi, %eax ; \ + movl %eax, PA(physfree) ; \ + movl %esi, %edi ; \ + movl $((npages) * PAGE_SIZE / 4),%ecx ; \ + xorl %eax,%eax ; \ + cld ; \ + rep ; \ stosl /* @@ -214,7 +220,7 @@ _KPTphys: .long 0 /* phys addr of kernel page tables */ * prot = protection bits */ #define fillkpt(base, prot) \ - shll $(PTEINDX),%ebx ; \ + shll $(PTEINDX),%ebx ; \ addl base,%ebx ; \ orl $(PTE_V) ,%eax ; \ orl prot,%eax ; \ @@ -232,11 +238,11 @@ _KPTphys: .long 0 /* phys addr of kernel page tables */ #define fillkptphys(prot) \ movl %eax, %ebx ; \ shrl $(PAGE_SHIFT), %ebx ; \ - fillkpt(PA(EXT(KPTphys)), prot) + fillkpt(PA(KPTphys), prot) - /* - * All CPUs start here. + * BSP CPU start here. + * eax points to kernbootstruct * * Environment: * protected mode, no paging, flat 32-bit address space. @@ -244,50 +250,103 @@ _KPTphys: .long 0 /* phys addr of kernel page tables */ */ .text .align ALIGN - .globl EXT(pstart) .globl EXT(_start) + .globl EXT(_pstart) LEXT(_start) +LEXT(_pstart) + mov %ds, %bx + mov %bx, %es + mov %eax, %ebp // Move kernbootstruct to ebp + POSTCODE(_PSTART_ENTRY) + movl KADDR(%ebp), %ebx // Load boot image phys addr + movl %ebx, %edx // Set edx with boot load phys addr + addl KSIZE(%ebp), %edx // Add boot image size + addl $(NBPG-1), %edx // Round to a page size + andl $(-NBPG), %edx // Set edx to first free page + movl %edx, %esp // Set temporay stack + addl $(NBPG), %esp // add page size + call Ls1 +Ls1: popl %esi // Get return address + cmpl $(PA(Ls1)), %esi // Compare with static physicall addr + je EXT(pstart) // Branch if equal + subl $(PA(Ls1)), %esi // Extract relocation offset + movl %esi, %esp // Store relocation offset in esp + leal (PA(Lreloc_start))(%esp),%esi + // Set esi to reloc_start boot phys addr + movl %edx, %edi // Set edi to first free page + movl $(Lreloc_end-Lreloc_start), %ecx + // Set ecx to copy code size + cld // count up + rep + movsb // copy reloc copy code + wbinvd // Write back and Invalidate cache + movl %ebx, %esi // Set esi to kernbootstruct kaddr + movl KADDR(%ebp), %edi // Load boot image phys addr + subl %esp, %edi // Adjust to static phys addr + movl KSIZE(%ebp), %ecx // Set ecx to kernbootstruct ksize + addl $(NBPG-1), %ecx // Add NBPG-1 to ecx + andl $(-NBPG), %ecx // Truncate ecx to a page aligned addr + sarl $2, %ecx // Divide ecx by 4 + movl %esp, (PA(EXT(KernelRelocOffset)))(%esp) + // Store relocation offset + movl %edi, KADDR(%ebp) // Relocate kaddr in kernbootstruct + subl %esp, MEMORYMAP(%ebp) // And relocate MemoryMap + subl %esp, DEVICETREEP(%ebp) // And relocate deviceTreeP + subl %esp, %ebp // Set ebp with relocated phys addr + jmp *%edx // Branch to relocated copy code +Lreloc_start: + POSTCODE(_PSTART_RELOC) + rep + movsl // Copy boot image at BASE_KERNEL_PADDR + wbinvd // Write back and Invalidate cache + movl $(PA(EXT(pstart))), %edx // Set branch target + jmp *%edx // Far jmp to pstart phys addr +Lreloc_end: + /* NOTREACHED */ + hlt + + .text + .globl __start + .set __start, PA(EXT(_pstart)) + +/* + * BSP CPU continues here after possible relocation. + * ebp points to kernbootstruct + */ + .align ALIGN + .globl EXT(pstart) LEXT(pstart) - mov %eax, %ebx /* save pointer to kernbootstruct */ + mov %ebp, %ebx /* get pointer to kernbootstruct */ - POSTCODE(PSTART_ENTRY); + POSTCODE(PSTART_ENTRY) mov $0,%ax /* fs must be zeroed; */ mov %ax,%fs /* some bootstrappers don`t do this */ mov %ax,%gs - jmp 1f -0: cmpl $0,PA(EXT(start_lock)) - jne 0b -1: movb $1,%eax - xchgl %eax,PA(EXT(start_lock)) /* locked */ - testl %eax,%eax - jnz 0b - - cmpl $0,PA(EXT(master_is_up)) /* are we first? */ - jne EXT(slave_start) /* no -- system already up. */ - movl $1,PA(EXT(master_is_up)) /* others become slaves */ - jmp 3f -3: - /* * Get startup parameters. */ - - movl %ebx,PA(EXT(boot_args_start)) /* Save KERNBOOTSTRUCT */ - movl KADDR(%ebx), %eax addl KSIZE(%ebx), %eax addl $(NBPG-1),%eax andl $(-NBPG), %eax - movl %eax, PA(EXT(KERNend)) movl %eax, PA(physfree) cld /* allocate kernel page table pages */ ALLOCPAGES(NKPT) - movl %esi,PA(EXT(KPTphys)) + movl %esi,PA(KPTphys) +#ifdef X86_64 +/* allocate PML4 page */ + ALLOCPAGES(1) + movl %esi,EXT(IdlePML4) +/* allocate new 3rd level directory page */ + ALLOCPAGES(1) + movl %esi,EXT(IdlePDPT64) +#endif + #ifdef PAE /* allocate Page Table Directory Page */ ALLOCPAGES(1) @@ -309,19 +368,23 @@ LEXT(pstart) movl PA(EXT(IdlePDPT)), %eax movl $1, %ecx fillkptphys( $(PTE_W) ) + + movl PA(EXT(IdlePDPT64)), %eax + movl $1, %ecx + fillkptphys( $(PTE_W) ) #endif movl PA(EXT(IdlePTD)),%eax movl $(NPGPTD), %ecx fillkptphys( $(PTE_W) ) /* install a pde for temp double map of bottom of VA */ - movl PA(EXT(KPTphys)),%eax + movl PA(KPTphys),%eax xorl %ebx,%ebx movl $(NKPT), %ecx fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) /* install pde's for page tables */ - movl PA(EXT(KPTphys)),%eax + movl PA(KPTphys),%eax movl $(KPTDI),%ebx movl $(NKPT),%ecx fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) @@ -398,24 +461,24 @@ LEXT(pstart) loop 1b #endif - POSTCODE(PSTART_PAGE_TABLES); + POSTCODE(PSTART_PAGE_TABLES) /* * Fix initial descriptor tables. */ - lea PA(EXT(idt)),%esi /* fix IDT */ + lea PA(EXT(master_idt)),%esi /* fix IDT */ movl $(IDTSZ),%ecx movl $(PA(fix_idt_ret)),%ebx jmp fix_desc_common /* (cannot use stack) */ fix_idt_ret: - lea PA(EXT(gdt)),%esi /* fix GDT */ + lea PA(EXT(master_gdt)),%esi /* fix GDT */ movl $(GDTSZ),%ecx movl $(PA(fix_gdt_ret)),%ebx jmp fix_desc_common /* (cannot use stack) */ fix_gdt_ret: - lea PA(EXT(ldt)),%esi /* fix LDT */ + lea PA(EXT(master_ldt)),%esi /* fix LDT */ movl $(LDTSZ),%ecx movl $(PA(fix_ldt_ret)),%ebx jmp fix_desc_common /* (cannot use stack) */ @@ -428,7 +491,7 @@ fix_ldt_ret: lgdt PA(EXT(gdtptr)) /* load GDT */ lidt PA(EXT(idtptr)) /* load IDT */ - POSTCODE(PSTART_BEFORE_PAGING); + POSTCODE(PSTART_BEFORE_PAGING) /* * Turn on paging. @@ -438,8 +501,21 @@ fix_ldt_ret: movl %eax, %cr3 movl %cr4, %eax - orl $(CR4_PAE), %eax + orl $(CR4_PAE|CR4_PGE|CR4_MCE), %eax movl %eax, %cr4 + + movl $0x80000001, %eax + cpuid + and $(CPUID_EXTFEATURE_XD), %edx /* clear all but bit 20 */ + cmp $0, %edx /* skip setting NXE if 20 is not set */ + je 1f + + movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ +1: + #else movl PA(EXT(IdlePTD)), %eax movl %eax,%cr3 @@ -452,7 +528,7 @@ fix_ldt_ret: LJMP(KERNEL_CS,EXT(vstart)) /* switch to kernel code segment */ /* - * Master is now running with correct addresses. + * BSP is now running with correct addresses. */ LEXT(vstart) POSTCODE(VSTART_ENTRY) ; @@ -461,20 +537,24 @@ LEXT(vstart) mov %ax,%ds mov %ax,%es mov %ax,%ss - mov %ax,EXT(ktss)+TSS_SS0 /* set kernel stack segment */ + mov %ax,EXT(master_ktss)+TSS_SS0 /* set kernel stack segment */ /* for traps to kernel */ + #if MACH_KDB - mov %ax,EXT(dbtss)+TSS_SS0 /* likewise for debug task switch */ + mov %ax,EXT(master_dbtss)+TSS_SS0 /* likewise for debug task switch */ mov %cr3,%eax /* get PDBR into debug TSS */ - mov %eax,EXT(dbtss)+TSS_PDBR + mov %eax,EXT(master_dbtss)+TSS_PDBR mov $0,%eax #endif + mov %cr3,%eax /* get PDBR into DF TSS */ + mov %eax,EXT(master_dftss)+TSS_PDBR + mov %eax,EXT(master_mctss)+TSS_PDBR movw $(KERNEL_LDT),%ax /* get LDT segment */ lldt %ax /* load LDT */ #if MACH_KDB - mov %ax,EXT(ktss)+TSS_LDT /* store LDT in two TSS, as well... */ - mov %ax,EXT(dbtss)+TSS_LDT /* ...matters if we switch tasks */ + mov %ax,EXT(master_ktss)+TSS_LDT /* store LDT in two TSS, as well... */ + mov %ax,EXT(master_dbtss)+TSS_LDT /* ...matters if we switch tasks */ #endif movw $(KERNEL_TSS),%ax ltr %ax /* set up KTSS */ @@ -482,72 +562,56 @@ LEXT(vstart) mov $(CPU_DATA_GS),%ax mov %ax,%gs - POSTCODE(VSTART_STACK_SWITCH); + POSTCODE(VSTART_STACK_SWITCH) - lea EXT(eintstack),%esp /* switch to the bootup stack */ - call EXT(i386_preinit) + lea EXT(low_eintstack),%esp /* switch to the bootup stack */ + pushl %ebp /* push boot args addr */ + xorl %ebp,%ebp /* clear stack frame ptr */ - POSTCODE(VSTART_EXIT); + POSTCODE(VSTART_EXIT) call EXT(i386_init) /* run C code */ /*NOTREACHED*/ hlt - .text - .globl __start - .set __start, PA(EXT(pstart)) - - -/* - * master_up is used by the master cpu to signify that it is done - * with the interrupt stack, etc. See the code in pstart and svstart - * that this interlocks with. - */ - .align ALIGN - .globl EXT(master_up) -LEXT(master_up) - pushl %ebp /* set up */ - movl %esp,%ebp /* stack frame */ - movl $0,%ecx /* unlock start_lock */ - xchgl %ecx,EXT(start_lock) /* since we are no longer using */ - /* bootstrap stack */ - leave /* pop stack frame */ - ret /* - * We aren't the first. Call slave_main to initialize the processor - * and get Mach going on it. + * AP (slave) CPUs enter here. + * + * Environment: + * protected mode, no paging, flat 32-bit address space. + * (Code/data/stack segments have base == 0, limit == 4G) */ .align ALIGN - .globl EXT(slave_start) -LEXT(slave_start) + .globl EXT(slave_pstart) +LEXT(slave_pstart) cli /* disable interrupts, so we don`t */ /* need IDT for a while */ - POSTCODE(SLAVE_START_ENTRY); + POSTCODE(SLAVE_PSTART_ENTRY) /* * Turn on paging. */ - movl $(EXT(spag_start)),%edx /* first paged code address */ - #ifdef PAE - movl $(0x4000), %eax - movl %eax, %cr3 - movl %cr4, %eax - orl $(CR4_PAE), %eax + orl $(CR4_PAE|CR4_PGE|CR4_MCE), %eax movl %eax, %cr4 -#else + + movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ +#endif movl $(0x4000),%eax /* tmp until we get mapped */ movl %eax,%cr3 -#endif movl %cr0,%eax orl $(CR0_PG|CR0_WP|CR0_PE),%eax movl %eax,%cr0 /* to enable paging */ - POSTCODE(SLAVE_START_EXIT); + POSTCODE(SLAVE_PSTART_EXIT) + movl $(EXT(spag_start)),%edx /* first paged code address */ jmp *%edx /* flush prefetch queue */ /* @@ -558,15 +622,15 @@ LEXT(spag_start) lgdt PA(EXT(gdtptr)) /* load GDT */ lidt PA(EXT(idtptr)) /* load IDT */ - LJMP(KERNEL_CS,EXT(svstart)) /* switch to kernel code segment */ + LJMP(KERNEL_CS,EXT(slave_vstart)) /* switch to kernel code segment */ /* * Slave is now running with correct addresses. */ -LEXT(svstart) +LEXT(slave_vstart) - POSTCODE(SVSTART_ENTRY); + POSTCODE(SLAVE_VSTART_ENTRY) #ifdef PAE movl PA(EXT(IdlePDPT)), %eax @@ -592,19 +656,18 @@ LEXT(svstart) /* * Switch to the per-cpu descriptor tables */ - POSTCODE(SVSTART_DESC_INIT); + POSTCODE(SLAVE_VSTART_DESC_INIT) CPU_NUMBER_FROM_LAPIC(%eax) movl CX(EXT(cpu_data_ptr),%eax),%ecx - movl CPU_DESC_TABLEP(%ecx), %ecx movw $(GDTSZ*8-1),0(%esp) /* set GDT size in GDT descriptor */ - leal MP_GDT(%ecx),%edx + movl CPU_DESC_INDEX+CDI_GDT(%ecx),%edx movl %edx,2(%esp) /* point to local GDT (linear addr) */ lgdt 0(%esp) /* load new GDT */ movw $(IDTSZ*8-1),0(%esp) /* set IDT size in IDT descriptor */ - leal MP_IDT(%ecx),%edx + movl CPU_DESC_INDEX+CDI_IDT(%ecx),%edx movl %edx,2(%esp) /* point to local IDT (linear addr) */ lidt 0(%esp) /* load new IDT */ @@ -620,15 +683,12 @@ LEXT(svstart) /* * Get stack top from pre-cpu data and switch */ - POSTCODE(SVSTART_STACK_SWITCH); + POSTCODE(SLAVE_VSTART_STACK_SWITCH) movl %gs:CPU_INT_STACK_TOP,%esp xorl %ebp,%ebp /* for completeness */ - movl $0,%eax /* unlock start_lock */ - xchgl %eax,EXT(start_lock) /* since we are no longer using */ - /* bootstrap stack */ - POSTCODE(SVSTART_EXIT); + POSTCODE(SLAVE_VSTART_EXIT) call EXT(i386_init_slave) /* start MACH */ /*NOTREACHED*/ diff --git a/osfmk/i386/start64.s b/osfmk/i386/start64.s new file mode 100644 index 000000000..25b70d602 --- /dev/null +++ b/osfmk/i386/start64.s @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +#include +#include +#include +#include +#include + + .data + .align 3 + .globl EXT(gdtptr64) + /* align below right */ + .word 0 +LEXT(gdtptr64) + .word Times(8,GDTSZ)-1 + /* XXX really want .quad here */ + .long EXT(master_gdt) + .long KERNEL_UBER_BASE_HI32 /* must be in uber-space */ + + .align 3 + .globl EXT(idtptr64) + /* align below right */ + .word 0 +LEXT(idtptr64) + .word Times(16,IDTSZ)-1 + /* XXX really want .quad here */ + .long EXT(master_idt64) + .long KERNEL_UBER_BASE_HI32 /* must be in uber-space */ + + .text + +Entry(ml_load_desc64) + + ENTER_64BIT_MODE() + + POSTCODE(ML_LOAD_DESC64_ENTRY) + + lgdt EXT(gdtptr64) /* load GDT */ + + POSTCODE(ML_LOAD_DESC64_GDT) + + lidt EXT(idtptr64) /* load IDT */ + + POSTCODE(ML_LOAD_DESC64_IDT) + + movw $(KERNEL_LDT),%ax /* get LDT segment */ + lldt %ax /* load LDT */ + + POSTCODE(ML_LOAD_DESC64_LDT) + + movw $(KERNEL_TSS),%ax + ltr %ax /* set up KTSS */ + + POSTCODE(ML_LOAD_DESC64_EXIT) + + ENTER_COMPAT_MODE() + + ret + + +Entry(ml_64bit_wrmsr64) + /* (uint32_t msr, uint64_t value) */ + /* (uint32_t msr, uint32_t lo, uint32_t hi) */ + + FRAME + + ENTER_64BIT_MODE() + + movl B_ARG0, %ecx + movl B_ARG1, %eax + movl B_ARG2, %edx + wrmsr + + ENTER_COMPAT_MODE() + + EMARF + ret + + +Entry(ml_64bit_lldt) + /* (int32_t selector) */ + + FRAME + + ENTER_64BIT_MODE() + + movl B_ARG0, %eax + lldt %ax + + ENTER_COMPAT_MODE() + + EMARF + ret + +Entry(set_64bit_debug_regs) + /* x86_debug_state64_t *ds */ + + FRAME + + ENTER_64BIT_MODE() + + mov B_ARG0, %edx + mov DS64_DR0(%edx), %rax + mov %rax, %dr0 + mov DS64_DR1(%edx), %rax + mov %rax, %dr1 + mov DS64_DR2(%edx), %rax + mov %rax, %dr2 + mov DS64_DR3(%edx), %rax + mov %rax, %dr3 + + ENTER_COMPAT_MODE() + + EMARF + ret + +Entry(flush_tlb64) + + FRAME + + ENTER_64BIT_MODE() + + mov %cr3, %rax + mov %rax, %cr3 + + ENTER_COMPAT_MODE() + + EMARF + ret + +/* FXSAVE and FXRSTOR operate in a mode dependent fashion, hence these variants. +* Must be called with interrupts disabled. +* We clear pending x87 exceptions here; this is technically incorrect, since we should +* propagate those to the user, but the compatibility mode kernel is currently not +* prepared to handle exceptions originating in 64-bit kernel mode. However, it may be possible +* to work around this should it prove necessary. +*/ + +Entry(fxsave64) + movl S_ARG0,%eax + ENTER_64BIT_MODE() + fnclex + fxsave 0(%eax) + ENTER_COMPAT_MODE() + ret + +Entry(fxrstor64) + movl S_ARG0,%eax + ENTER_64BIT_MODE() + fnclex + fxrstor 0(%rax) + ENTER_COMPAT_MODE() + ret diff --git a/osfmk/i386/startup64.c b/osfmk/i386/startup64.c new file mode 100644 index 000000000..0d746139d --- /dev/null +++ b/osfmk/i386/startup64.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include /* prototyping */ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +void +cpu_IA32e_enable(cpu_data_t *cdp) +{ + uint32_t cr0 = get_cr0(); + uint64_t efer = rdmsr64(MSR_IA32_EFER); + + assert(!ml_get_interrupts_enabled()); + + postcode(CPU_IA32_ENABLE_ENTRY); + + /* Turn paging off - works because we're identity mapped */ + set_cr0(cr0 & ~CR0_PG); + + /* pop in new top level phys pg addr */ + set_cr3((vm_offset_t) kernel64_cr3); + + wrmsr64(MSR_IA32_EFER, efer | MSR_IA32_EFER_LME); /* set mode */ + + /* Turn paging on */ + set_cr0(cr0 | CR0_PG); + + /* this call is required to re-activate paging */ + kprintf("cpu_IA32e_enable(%p)\n", cdp); + + if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0) + panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted"); + + cdp->cpu_kernel_cr3 = kernel64_cr3; + + postcode(CPU_IA32_ENABLE_EXIT); +} + +void +cpu_IA32e_disable(cpu_data_t *cdp) +{ + uint32_t cr0 = get_cr0(); + uint64_t efer = rdmsr64(MSR_IA32_EFER); + + assert(!ml_get_interrupts_enabled()); + + postcode(CPU_IA32_DISABLE_ENTRY); + + if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0) + panic("cpu_IA32e_disable() MSR_IA32_EFER_LMA clear on entry"); + + /* Turn paging off - works because we're identity mapped */ + set_cr0(cr0 & ~CR0_PG); + + /* pop in legacy top level phys pg addr */ + set_cr3((vm_offset_t) lo_kernel_cr3); + + wrmsr64(MSR_IA32_EFER, efer & ~MSR_IA32_EFER_LME); /* reset mode */ + + /* Turn paging on */ + set_cr0(cr0 | CR0_PG); + + /* this call is required to re-activate paging */ + kprintf("cpu_IA32e_disable(%p)\n", cdp); + + if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) != 0) + panic("cpu_IA32e_disable() MSR_IA32_EFER_LMA not cleared"); + + cdp->cpu_kernel_cr3 = 0ULL; + + postcode(CPU_IA32_DISABLE_EXIT); +} + +void +fix_desc64(void *descp, int count) +{ + struct fake_descriptor64 *fakep; + union { + struct real_gate64 gate; + struct real_descriptor64 desc; + } real; + int i; + + fakep = (struct fake_descriptor64 *) descp; + + for (i = 0; i < count; i++, fakep++) { + /* + * Construct the real decriptor locally. + */ + + bzero((void *) &real, sizeof(real)); + + switch (fakep->access & ACC_TYPE) { + case 0: + break; + case ACC_CALL_GATE: + case ACC_INTR_GATE: + case ACC_TRAP_GATE: + real.gate.offset_low16 = fakep->offset[0] & 0xFFFF; + real.gate.selector16 = fakep->lim_or_seg & 0xFFFF; + real.gate.IST = fakep->size_or_IST & 0x7; + real.gate.access8 = fakep->access; + real.gate.offset_high16 = (fakep->offset[0]>>16)&0xFFFF; + real.gate.offset_top32 = (uint32_t)fakep->offset[1]; + break; + default: /* Otherwise */ + real.desc.limit_low16 = fakep->lim_or_seg & 0xFFFF; + real.desc.base_low16 = fakep->offset[0] & 0xFFFF; + real.desc.base_med8 = (fakep->offset[0] >> 16) & 0xFF; + real.desc.access8 = fakep->access; + real.desc.limit_high4 = (fakep->lim_or_seg >> 16) & 0xFF; + real.desc.granularity4 = fakep->size_or_IST; + real.desc.base_high8 = (fakep->offset[0] >> 24) & 0xFF; + real.desc.base_top32 = (uint32_t) fakep->offset[1]; + } + + /* + * Now copy back over the fake structure. + */ + bcopy((void *) &real, (void *) fakep, sizeof(real)); + } +} + +#if DEBUG +extern void dump_gdt(void *); +extern void dump_ldt(void *); +extern void dump_idt(void *); +extern void dump_tss(void *); +extern void dump_frame32(x86_saved_state_compat32_t *scp); +extern void dump_frame64(x86_saved_state64_t *scp); + +void +dump_frame32(x86_saved_state_compat32_t *scp) +{ + unsigned int i; + uint32_t *ip = (uint32_t *) scp; + + kprintf("dump_frame32(0x%08x):\n", scp); + + for (i = 0; + i < sizeof(x86_saved_state_compat32_t)/sizeof(uint32_t); + i++, ip++) + kprintf("0x%08x: 0x%08x\n", ip, *ip); + + kprintf("scp->isf64.err: 0x%016llx\n", scp->isf64.err); + kprintf("scp->isf64.rip: 0x%016llx\n", scp->isf64.rip); + kprintf("scp->isf64.cs: 0x%016llx\n", scp->isf64.cs); + kprintf("scp->isf64.rflags: 0x%016llx\n", scp->isf64.rflags); + kprintf("scp->isf64.rsp: 0x%016llx\n", scp->isf64.rsp); + kprintf("scp->isf64.ss: 0x%016llx\n", scp->isf64.ss); + + kprintf("scp->iss32.tag: 0x%08x\n", scp->iss32.tag); + kprintf("scp->iss32.state.gs: 0x%08x\n", scp->iss32.state.gs); + kprintf("scp->iss32.state.fs: 0x%08x\n", scp->iss32.state.fs); + kprintf("scp->iss32.state.es: 0x%08x\n", scp->iss32.state.es); + kprintf("scp->iss32.state.ds: 0x%08x\n", scp->iss32.state.ds); + kprintf("scp->iss32.state.edi: 0x%08x\n", scp->iss32.state.edi); + kprintf("scp->iss32.state.esi: 0x%08x\n", scp->iss32.state.esi); + kprintf("scp->iss32.state.ebp: 0x%08x\n", scp->iss32.state.ebp); + kprintf("scp->iss32.state.cr2: 0x%08x\n", scp->iss32.state.cr2); + kprintf("scp->iss32.state.ebx: 0x%08x\n", scp->iss32.state.ebx); + kprintf("scp->iss32.state.edx: 0x%08x\n", scp->iss32.state.edx); + kprintf("scp->iss32.state.ecx: 0x%08x\n", scp->iss32.state.ecx); + kprintf("scp->iss32.state.eax: 0x%08x\n", scp->iss32.state.eax); + kprintf("scp->iss32.state.trapno: 0x%08x\n", scp->iss32.state.eax); + kprintf("scp->iss32.state.eip: 0x%08x\n", scp->iss32.state.eip); + kprintf("scp->iss32.state.cs: 0x%08x\n", scp->iss32.state.cs); + kprintf("scp->iss32.state.efl: 0x%08x\n", scp->iss32.state.efl); + kprintf("scp->iss32.state.uesp: 0x%08x\n", scp->iss32.state.uesp); + kprintf("scp->iss32.state.ss: 0x%08x\n", scp->iss32.state.ss); + + postcode(0x99); +} + +void +dump_frame64(x86_saved_state64_t *scp) +{ + unsigned int i; + uint64_t *ip = (uint64_t *) scp; + + kprintf("dump_frame64(0x%08x):\n", scp); + + for (i = 0; + i < sizeof(x86_saved_state64_t)/sizeof(uint64_t); + i++, ip++) + kprintf("0x%08x: 0x%016x\n", ip, *ip); + + kprintf("scp->isf.trapno: 0x%08x\n", scp->isf.trapno); + kprintf("scp->isf.trapfn: 0x%08x\n", scp->isf.trapfn); + kprintf("scp->isf.err: 0x%016llx\n", scp->isf.err); + kprintf("scp->isf.rip: 0x%016llx\n", scp->isf.rip); + kprintf("scp->isf.cs: 0x%016llx\n", scp->isf.cs); + kprintf("scp->isf.rflags: 0x%016llx\n", scp->isf.rflags); + kprintf("scp->isf.rsp: 0x%016llx\n", scp->isf.rsp); + kprintf("scp->isf.ss: 0x%016llx\n", scp->isf.ss); + + kprintf("scp->fs: 0x%016llx\n", scp->fs); + kprintf("scp->gs: 0x%016llx\n", scp->gs); + kprintf("scp->rax: 0x%016llx\n", scp->rax); + kprintf("scp->rcx: 0x%016llx\n", scp->rcx); + kprintf("scp->rbx: 0x%016llx\n", scp->rbx); + kprintf("scp->rbp: 0x%016llx\n", scp->rbp); + kprintf("scp->r11: 0x%016llx\n", scp->r11); + kprintf("scp->r12: 0x%016llx\n", scp->r12); + kprintf("scp->r13: 0x%016llx\n", scp->r13); + kprintf("scp->r14: 0x%016llx\n", scp->r14); + kprintf("scp->r15: 0x%016llx\n", scp->r15); + kprintf("scp->cr2: 0x%016llx\n", scp->cr2); + kprintf("scp->v_arg8: 0x%016llx\n", scp->v_arg8); + kprintf("scp->v_arg7: 0x%016llx\n", scp->v_arg7); + kprintf("scp->v_arg6: 0x%016llx\n", scp->v_arg6); + kprintf("scp->r9: 0x%016llx\n", scp->r9); + kprintf("scp->r8: 0x%016llx\n", scp->r8); + kprintf("scp->r10: 0x%016llx\n", scp->r10); + kprintf("scp->rdx: 0x%016llx\n", scp->rdx); + kprintf("scp->rsi: 0x%016llx\n", scp->rsi); + kprintf("scp->rdi: 0x%016llx\n", scp->rdi); + + postcode(0x98); +} + +void +dump_gdt(void *gdtp) +{ + unsigned int i; + uint32_t *ip = (uint32_t *) gdtp; + + kprintf("GDT:\n", ip); + for (i = 0; i < GDTSZ; i++, ip += 2) { + kprintf("%p: 0x%08x\n", ip+0, *(ip+0)); + kprintf("%p: 0x%08x\n", ip+1, *(ip+1)); + } +} + +void +dump_ldt(void *ldtp) +{ + unsigned int i; + uint32_t *ip = (uint32_t *) ldtp; + + kprintf("LDT:\n", ip); + for (i = 0; i < LDTSZ_MIN; i++, ip += 2) { + kprintf("%p: 0x%08x\n", ip+0, *(ip+0)); + kprintf("%p: 0x%08x\n", ip+1, *(ip+1)); + } +} + +void +dump_idt(void *idtp) +{ + unsigned int i; + uint32_t *ip = (uint32_t *) idtp; + + kprintf("IDT64:\n", ip); + for (i = 0; i < 16; i++, ip += 4) { + kprintf("%p: 0x%08x\n", ip+0, *(ip+0)); + kprintf("%p: 0x%08x\n", ip+1, *(ip+1)); + kprintf("%p: 0x%08x\n", ip+2, *(ip+2)); + kprintf("%p: 0x%08x\n", ip+3, *(ip+3)); + } +} + +void +dump_tss(void *tssp) +{ + unsigned int i; + uint32_t *ip = (uint32_t *) tssp; + + kprintf("TSS64:\n", ip); + for (i = 0; i < sizeof(master_ktss64)/sizeof(uint32_t); i++, ip++) { + kprintf("%p: 0x%08x\n", ip+0, *(ip+0)); + } +} +#endif /* DEBUG */ diff --git a/osfmk/i386/task.h b/osfmk/i386/task.h index 931b12a2a..c15ac0963 100644 --- a/osfmk/i386/task.h +++ b/osfmk/i386/task.h @@ -49,8 +49,7 @@ */ /* - * No machine dependant task fields + * Machine dependant task fields */ -#define MACHINE_TASK - +#define MACHINE_TASK struct user_ldt * i386_ldt; diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index d3f5f1d5d..d23a9d087 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -83,119 +83,77 @@ * For performance, it is also used directly in syscall exceptions * if the server has requested i386_THREAD_STATE flavor for the exception * port. - * - * We define the following as an alias for the "esp" field of the - * structure, because we actually save cr2 here, not the kernel esp. */ -#define cr2 esp /* * Save area for user floating-point state. * Allocated only when necessary. */ -struct i386_fpsave_state { +struct x86_fpsave_state { boolean_t fp_valid; - struct i386_fp_save fp_save_state; - struct i386_fp_regs fp_regs; - struct i386_fx_save fx_save_state __attribute__ ((aligned (16))); - int fp_save_flavor; -}; - -/* - * v86_assist_state: - * - * This structure provides data to simulate 8086 mode - * interrupts. It lives in the pcb. - */ - -struct v86_assist_state { - vm_offset_t int_table; - unsigned short int_count; - unsigned short flags; /* 8086 flag bits */ + enum { + FXSAVE32 = 1, + FXSAVE64 = 2 + } fp_save_layout; + struct x86_fx_save fx_save_state __attribute__ ((aligned (16))); }; -#define V86_IF_PENDING 0x8000 /* unused bit */ - -/* - * i386_interrupt_state: - * - * This structure describes the set of registers that must - * be pushed on the current ring-0 stack by an interrupt before - * we can switch to the interrupt stack. - */ -struct i386_interrupt_state { - int gs; - int fs; - int es; - int ds; - int edx; - int ecx; - int eax; - int eip; - int cs; - int efl; -}; /* - * i386_kernel_state: + * x86_kernel_state32: * * This structure corresponds to the state of kernel registers * as saved in a context-switch. It lives at the base of the stack. + * kernel only runs in 32 bit mode for now */ -struct i386_kernel_state { +struct x86_kernel_state32 { int k_ebx; /* kernel context */ int k_esp; int k_ebp; int k_edi; int k_esi; int k_eip; + /* + * Kernel stacks are 16-byte aligned with a 4-byte i386_exception_link at + * the top, followed by an x86_kernel_state32. After both structs have + * been pushed, we want to be 16-byte aligned. A dummy int gets us there. + */ + int dummy; }; -/* - * i386_machine_state: - * - * This structure corresponds to special machine state. - * It lives in the pcb. It is not saved by default. - */ - -struct i386_machine_state { - iopb_tss_t io_tss; - struct user_ldt * ldt; - struct i386_fpsave_state *ifps; - struct v86_assist_state v86s; -}; typedef struct pcb { - struct i386_interrupt_state iis[2]; /* interrupt and NMI */ - struct i386_saved_state iss; - struct i386_machine_state ims; + void *sf; + x86_saved_state_t *iss; + struct x86_fpsave_state *ifps; #ifdef MACH_BSD - unsigned long cthread_self; /* for use of cthread package */ + uint64_t cthread_self; /* for use of cthread package */ struct real_descriptor cthread_desc; - unsigned long uldt_selector; /* user ldt selector to set */ - struct real_descriptor uldt_desc; /* the actual user setable ldt data */ + unsigned long uldt_selector; /* user ldt selector to set */ + struct real_descriptor uldt_desc; /* the actual user setable ldt data */ #endif - decl_simple_lock_data(,lock) + decl_simple_lock_data(,lock); + uint64_t iss_pte0; + uint64_t iss_pte1; + void *ids; } *pcb_t; + /* * Maps state flavor to number of words in the state: */ __private_extern__ unsigned int _MachineStateCount[]; -#define USER_REGS(ThrAct) (&(ThrAct)->machine.pcb->iss) - -#define act_machine_state_ptr(ThrAct) (thread_state_t)USER_REGS(ThrAct) +#define USER_STATE(ThrAct) ((ThrAct)->machine.pcb->iss) +#define USER_REGS32(ThrAct) (saved_state32(USER_STATE(ThrAct))) +#define USER_REGS64(ThrAct) (saved_state64(USER_STATE(ThrAct))) +#define user_pc(ThrAct) (is_saved_state32(USER_STATE(ThrAct)) ? \ + USER_REGS32(ThrAct)->eip : \ + USER_REGS64(ThrAct)->isf.rip ) -#define is_user_thread(ThrAct) \ - ((USER_REGS(ThrAct)->efl & EFL_VM) \ - || ((USER_REGS(ThrAct)->cs & 0x03) != 0)) - -#define user_pc(ThrAct) (USER_REGS(ThrAct)->eip) -#define user_sp(ThrAct) (USER_REGS(ThrAct)->uesp) struct machine_thread { /* @@ -205,9 +163,24 @@ struct machine_thread { struct pcb xxx_pcb; pcb_t pcb; + uint32_t specFlags; +#define OnProc 0x1 + + struct { + user_addr_t user_base; + } copy_window[NCOPY_WINDOWS]; + int nxt_window; + int copyio_state; +#define WINDOWS_DIRTY 0 +#define WINDOWS_CLEAN 1 +#define WINDOWS_CLOSED 2 +#define WINDOWS_OPENED 3 + uint64_t physwindow_pte; + int physwindow_busy; }; -extern struct i386_saved_state *get_user_regs(thread_t); + +extern void *get_user_regs(thread_t); extern void *act_thread_csave(void); extern void act_thread_catt(void *ctx); @@ -220,7 +193,7 @@ extern void act_thread_cfree(void *ctx); * It points to the current thread`s user registers. */ struct i386_exception_link { - struct i386_saved_state *saved_state; + x86_saved_state_t *saved_state; }; @@ -233,7 +206,7 @@ struct i386_exception_link { */ #define STACK_IKS(stack) \ - ((struct i386_kernel_state *)((stack) + KERNEL_STACK_SIZE) - 1) + ((struct x86_kernel_state32 *)((stack) + KERNEL_STACK_SIZE) - 1) #define STACK_IEL(stack) \ ((struct i386_exception_link *)STACK_IKS(stack) - 1) diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index e8b8a2a20..bb3952249 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -63,6 +63,7 @@ #include #include #include +#include /* inb() */ #include #include @@ -82,17 +83,14 @@ #include #include +#include + #if MACH_KGDB #include #endif /* MACH_KGDB */ -#include - -#if MACH_KGDB -#include -#endif /* MACH_KGDB */ - #if MACH_KDB +#include #include #include #include @@ -102,31 +100,55 @@ #include #include +#include +#include +#include +#include /* * Forward declarations */ -extern void user_page_fault_continue( - kern_return_t kr); +static void user_page_fault_continue(kern_return_t kret); +static void panic_trap(x86_saved_state32_t *saved_state); +static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip); -extern boolean_t v86_assist( - thread_t thread, - struct i386_saved_state *regs); - -extern boolean_t check_io_fault( - struct i386_saved_state *regs); - -extern int inst_fetch( - int eip, - int cs); +perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */ +perfCallback perfASTHook = NULL; /* Pointer to CHUD AST hook routine */ void thread_syscall_return( kern_return_t ret) { - register thread_t thr_act = current_thread(); - register struct i386_saved_state *regs = USER_REGS(thr_act); - regs->eax = ret; + thread_t thr_act = current_thread(); + + if (thread_is_64bit(thr_act)) { + x86_saved_state64_t *regs; + + regs = USER_REGS64(thr_act); + + if (kdebug_enable && ((regs->rax & SYSCALL_CLASS_MASK) == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT))) { + /* Mach trap */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_SC, ((int) (regs->rax & SYSCALL_NUMBER_MASK))) + | DBG_FUNC_END, + ret, 0, 0, 0, 0); + } + regs->rax = ret; + + } else { + x86_saved_state32_t *regs; + + regs = USER_REGS32(thr_act); + + if (kdebug_enable && ((int) regs->eax < 0)) { + /* Mach trap */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_SC, -((int) regs->eax)) + | DBG_FUNC_END, + ret, 0, 0, 0, 0); + } + regs->eax = ret; + } thread_exception_return(); /*NOTREACHED*/ } @@ -141,27 +163,71 @@ extern boolean_t db_breakpoints_inserted; void thread_kdb_return(void) { - register thread_t thread = current_thread(); - register struct i386_saved_state *regs = USER_REGS(thread); + thread_t thr_act = current_thread(); + x86_saved_state_t *iss = USER_STATE(thr_act); - if (kdb_trap(regs->trapno, regs->err, regs)) { -#if MACH_LDEBUG - assert(thread->mutex_count == 0); -#endif /* MACH_LDEBUG */ - thread_exception_return(); - /*NOTREACHED*/ + if (is_saved_state64(iss)) { + x86_saved_state64_t *regs; + + regs = saved_state64(iss); + + if (kdb_trap(regs->isf.trapno, (int)regs->isf.err, (void *)regs)) { + thread_exception_return(); + /*NOTREACHED*/ + } + + } else { + x86_saved_state32_t *regs; + + regs = saved_state32(iss); + + if (kdb_trap(regs->trapno, regs->err, (void *)regs)) { + thread_exception_return(); + /*NOTREACHED*/ + } } } -boolean_t let_ddb_vm_fault = FALSE; #endif /* MACH_KDB */ void user_page_fault_continue( - kern_return_t kr) + kern_return_t kr) { - register thread_t thread = current_thread(); - register struct i386_saved_state *regs = USER_REGS(thread); + thread_t thread = current_thread(); + x86_saved_state_t *regs = USER_STATE(thread); + ast_t *myast; + boolean_t intr; + user_addr_t vaddr; +#if MACH_KDB + int err; + int trapno; +#endif + + assert((is_saved_state32(regs) && !thread_is_64bit(thread)) || + (is_saved_state64(regs) && thread_is_64bit(thread))); + + if (thread_is_64bit(thread)) { + x86_saved_state64_t *uregs; + + uregs = USER_REGS64(thread); + +#if MACH_KDB + trapno = uregs->isf.trapno; + err = uregs->isf.err; +#endif + vaddr = (user_addr_t)uregs->cr2; + } else { + x86_saved_state32_t *uregs; + + uregs = USER_REGS32(thread); + +#if MACH_KDB + trapno = uregs->trapno; + err = uregs->err; +#endif + vaddr = uregs->cr2; + } if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) { #if MACH_KDB @@ -170,28 +236,34 @@ user_page_fault_continue( } if (db_watchpoint_list && db_watchpoints_inserted && - (regs->err & T_PF_WRITE) && + (err & T_PF_WRITE) && db_find_watchpoint(thread->map, - (vm_offset_t)regs->cr2, + (vm_offset_t)vaddr, regs)) kdb_trap(T_WATCHPOINT, 0, regs); #endif /* MACH_KDB */ + intr = ml_set_interrupts_enabled(FALSE); + myast = ast_pending(); + while (*myast & AST_ALL) { + ast_taken(AST_ALL, intr); + ml_set_interrupts_enabled(FALSE); + myast = ast_pending(); + } + ml_set_interrupts_enabled(intr); + thread_exception_return(); /*NOTREACHED*/ } #if MACH_KDB if (debug_all_traps_with_kdb && - kdb_trap(regs->trapno, regs->err, regs)) { -#if MACH_LDEBUG - assert(thread->mutex_count == 0); -#endif /* MACH_LDEBUG */ + kdb_trap(trapno, err, regs)) { thread_exception_return(); /*NOTREACHED*/ } #endif /* MACH_KDB */ - i386_exception(EXC_BAD_ACCESS, kr, regs->cr2); + i386_exception(EXC_BAD_ACCESS, kr, vaddr); /*NOTREACHED*/ } @@ -206,56 +278,165 @@ struct recovery { extern struct recovery recover_table[]; extern struct recovery recover_table_end[]; -/* - * Recovery from Successful fault in copyout does not - * return directly - it retries the pte check, since - * the 386 ignores write protection in kernel mode. - */ -extern struct recovery retry_table[]; -extern struct recovery retry_table_end[]; - -const char * trap_type[] = {TRAP_NAMES}; -int TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); +const char * trap_type[] = {TRAP_NAMES}; +unsigned TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); +extern unsigned panic_io_port; +static inline void +reset_dr7(void) +{ + uint32_t dr7 = 0x400; /* magic dr7 reset value */ + __asm__ volatile("movl %0,%%dr7" : : "r" (dr7)); +} +#if MACH_KDP +unsigned kdp_has_active_watchpoints = 0; +#endif /* * Trap from kernel mode. Only page-fault errors are recoverable, * and then only in special circumstances. All other errors are * fatal. Return value indicates if trap was handled. */ -boolean_t +void kernel_trap( - register struct i386_saved_state *regs) + x86_saved_state_t *state) { + x86_saved_state32_t *saved_state; int code; - unsigned int subcode; - int interruptible = THREAD_UNINT; - register int type; + user_addr_t vaddr; + int type; vm_map_t map; kern_return_t result = KERN_FAILURE; - register thread_t thread; + thread_t thread; + ast_t *myast; + boolean_t intr; + vm_prot_t prot; + struct recovery *rp; + vm_offset_t kern_ip; + int fault_in_copy_window = -1; + int is_user = 0; +#if MACH_KDB + pt_entry_t *pte; +#endif /* MACH_KDB */ - type = regs->trapno; - code = regs->err; thread = current_thread(); + if (is_saved_state64(state)) + panic("kernel_trap(%p) with 64-bit state", state); + saved_state = saved_state32(state); + + vaddr = (user_addr_t)saved_state->cr2; + type = saved_state->trapno; + code = saved_state->err & 0xffff; + intr = (saved_state->efl & EFL_IF) != 0; /* state of ints at trap */ + + kern_ip = (vm_offset_t)saved_state->eip; + + myast = ast_pending(); + + if (perfASTHook) { + if (*myast & AST_CHUD_ALL) + perfASTHook(type, NULL, 0, 0); + } else + *myast &= ~AST_CHUD_ALL; + + /* + * Is there a hook? + */ + if (perfTrapHook) { + if (perfTrapHook(type, NULL, 0, 0) == KERN_SUCCESS) { + /* + * If it succeeds, we are done... + */ + return; + } + } + /* + * we come here with interrupts off as we don't want to recurse + * on preemption below. but we do want to re-enable interrupts + * as soon we possibly can to hold latency down + */ + if (T_PREEMPT == type) { + + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, + 0, 0, 0, kern_ip, 0); + + ast_taken(AST_PREEMPTION, FALSE); + return; + } + + if (T_PAGE_FAULT == type) { + /* + * assume we're faulting in the kernel map + */ + map = kernel_map; + + if (thread != THREAD_NULL && thread->map != kernel_map) { + vm_offset_t copy_window_base; + vm_offset_t kvaddr; + int window_index; + + kvaddr = (vm_offset_t)vaddr; + /* + * must determine if fault occurred in + * the copy window while pre-emption is + * disabled for this processor so that + * we only need to look at the window + * associated with this processor + */ + copy_window_base = current_cpu_datap()->cpu_copywindow_base; + + if (kvaddr >= copy_window_base && kvaddr < (copy_window_base + (NBPDE * NCOPY_WINDOWS)) ) { + + window_index = (kvaddr - copy_window_base) / NBPDE; + + if (thread->machine.copy_window[window_index].user_base != (user_addr_t)-1) { + + kvaddr -= (copy_window_base + (NBPDE * window_index)); + vaddr = thread->machine.copy_window[window_index].user_base + kvaddr; + + map = thread->map; + fault_in_copy_window = window_index; + } + is_user = -1; + } + } + } + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, + (int)(vaddr >> 32), (int)vaddr, is_user, kern_ip, 0); + + + (void) ml_set_interrupts_enabled(intr); + switch (type) { - case T_PREEMPT: - ast_taken(AST_PREEMPTION, FALSE); - return (TRUE); case T_NO_FPU: fpnoextflt(); - return (TRUE); + return; case T_FPU_FAULT: fpextovrflt(); - return (TRUE); + return; case T_FLOATING_POINT_ERROR: fpexterrflt(); - return (TRUE); + return; + case T_SSE_FLOAT_ERROR: + fpSSEexterrflt(); + return; + case T_DEBUG: + if ((saved_state->efl & EFL_TF) == 0 + && !kdp_has_active_watchpoints) { + /* We've somehow encountered a debug + * register match that does not belong + * to the kernel debugger. + * This isn't supposed to happen. + */ + reset_dr7(); + return; + } + goto debugger_entry; case T_PAGE_FAULT: /* * If the current map is a submap of the kernel map, @@ -264,144 +445,116 @@ kernel_trap( * (vm_map_lookup), we may deadlock on the kernel map * lock. */ -#if MACH_KDB - mp_disable_preemption(); - if (db_active - && kdb_active[cpu_number()] - && !let_ddb_vm_fault) { - /* - * Force kdb to handle this one. - */ - mp_enable_preemption(); - return (FALSE); - } - mp_enable_preemption(); -#endif /* MACH_KDB */ - subcode = regs->cr2; /* get faulting address */ - if (subcode > LINEAR_KERNEL_ADDRESS) { - map = kernel_map; - } else if (thread == THREAD_NULL) - map = kernel_map; - else { - map = thread->map; - } + prot = VM_PROT_READ; + + if (code & T_PF_WRITE) + prot |= VM_PROT_WRITE; +#if PAE + if (code & T_PF_EXECUTE) + prot |= VM_PROT_EXECUTE; +#endif + #if MACH_KDB /* * Check for watchpoint on kernel static data. * vm_fault would fail in this case */ - if (map == kernel_map && - db_watchpoint_list && - db_watchpoints_inserted && - (code & T_PF_WRITE) && - (vm_offset_t)subcode < vm_last_phys && - ((*(pte = pmap_pte(kernel_pmap, (vm_offset_t)subcode))) & - INTEL_PTE_WRITE) == 0) { - *pte = *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE; /* XXX need invltlb here? */ + if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && + (code & T_PF_WRITE) && vaddr < vm_map_max(map) && + ((*(pte = pmap_pte(kernel_pmap, (vm_map_offset_t)vaddr))) & INTEL_PTE_WRITE) == 0) { + pmap_store_pte( + pte, + *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE); + /* XXX need invltlb here? */ + result = KERN_SUCCESS; - } else -#endif /* MACH_KDB */ - { - /* - * Since the 386 ignores write protection in - * kernel mode, always try for write permission - * first. If that fails and the fault was a - * read fault, retry with read permission. - */ - if (map == kernel_map) { - register struct recovery *rp; - - interruptible = THREAD_UNINT; - for (rp = recover_table; rp < recover_table_end; rp++) { - if (regs->eip == rp->fault_addr) { - interruptible = THREAD_ABORTSAFE; - break; - } - } - } - result = vm_fault(map, - trunc_page((vm_offset_t)subcode), - VM_PROT_READ|VM_PROT_WRITE, - FALSE, - (map == kernel_map) ? interruptible : THREAD_ABORTSAFE, NULL, 0); + goto look_for_watchpoints; } +#endif /* MACH_KDB */ + + result = vm_fault(map, + vm_map_trunc_page(vaddr), + prot, + FALSE, + THREAD_UNINT, NULL, 0); + #if MACH_KDB if (result == KERN_SUCCESS) { - /* Look for watchpoints */ - if (db_watchpoint_list && - db_watchpoints_inserted && - (code & T_PF_WRITE) && - db_find_watchpoint(map, - (vm_offset_t)subcode, regs)) - kdb_trap(T_WATCHPOINT, 0, regs); + /* + * Look for watchpoints + */ +look_for_watchpoints: + if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && (code & T_PF_WRITE) && + db_find_watchpoint(map, vaddr, saved_state)) + kdb_trap(T_WATCHPOINT, 0, saved_state); } - else #endif /* MACH_KDB */ - if ((code & T_PF_WRITE) == 0 && - result == KERN_PROTECTION_FAILURE) - { - /* - * Must expand vm_fault by hand, - * so that we can ask for read-only access - * but enter a (kernel)writable mapping. - */ - result = intel_read_fault(map, - trunc_page((vm_offset_t)subcode)); - } if (result == KERN_SUCCESS) { - /* - * Certain faults require that we back up - * the EIP. - */ - register struct recovery *rp; - - for (rp = retry_table; rp < retry_table_end; rp++) { - if (regs->eip == rp->fault_addr) { - regs->eip = rp->recover_addr; - break; + + if (fault_in_copy_window != -1) { + pt_entry_t *updp; + pt_entry_t *kpdp; + + /* + * in case there was no page table assigned + * for the user base address and the pmap + * got 'expanded' due to this fault, we'll + * copy in the descriptor + * + * we're either setting the page table descriptor + * to the same value or it was 0... no need + * for a TLB flush in either case + */ + + ml_set_interrupts_enabled(FALSE); + updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base); + assert(updp); + if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */ + kpdp = current_cpu_datap()->cpu_copywindow_pdp; + kpdp += fault_in_copy_window; + +#if JOE_DEBUG + if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME)) + panic("kernel_fault: user pdp doesn't match - updp = 0x%x, kpdp = 0x%x\n", updp, kpdp); +#endif + pmap_store_pte(kpdp, *updp); + + (void) ml_set_interrupts_enabled(intr); } - } - return (TRUE); + return; } - - /* fall through */ + /* + * fall through + */ case T_GENERAL_PROTECTION: - /* * If there is a failure recovery address * for this fault, go there. */ - { - register struct recovery *rp; - - for (rp = recover_table; - rp < recover_table_end; - rp++) { - if (regs->eip == rp->fault_addr) { - regs->eip = rp->recover_addr; - return (TRUE); + for (rp = recover_table; rp < recover_table_end; rp++) { + if (kern_ip == rp->fault_addr) { + set_recovery_ip(saved_state, rp->recover_addr); + return; } - } } /* - * Check thread recovery address also - - * v86 assist uses it. + * Check thread recovery address also. */ if (thread->recover) { - regs->eip = thread->recover; - thread->recover = 0; - return (TRUE); + set_recovery_ip(saved_state, thread->recover); + thread->recover = 0; + return; } - /* * Unanticipated page-fault errors in kernel * should not happen. + * + * fall through... */ - /* fall through... */ default: /* @@ -410,71 +563,337 @@ kernel_trap( */ if (type == 15) { kprintf("kernel_trap() ignoring spurious trap 15\n"); - return (TRUE); + return; } - - /* - * ...and return failure, so that locore can call into - * debugger. +debugger_entry: + /* Ensure that the i386_kernel_state at the base of the + * current thread's stack (if any) is synchronized with the + * context at the moment of the trap, to facilitate + * access through the debugger. */ + sync_iss_to_iks(saved_state); +#if MACH_KDB +restart_debugger: +#endif /* MACH_KDB */ #if MACH_KDP - kdp_i386_trap(type, regs, result, regs->cr2); -#endif - return (FALSE); + if (current_debugger != KDB_CUR_DB) { + if (kdp_i386_trap(type, saved_state, result, vaddr)) + return; + } +#endif /* MACH_KDP */ +#if MACH_KDB + else + if (kdb_trap(type, code, saved_state)) { + if (switch_debugger) { + current_debugger = KDP_CUR_DB; + switch_debugger = 0; + goto restart_debugger; + } + return; + } +#endif /* MACH_KDB */ } - return (TRUE); + + panic_trap(saved_state); + /* + * NO RETURN + */ +} + + +static void +set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip) +{ + saved_state->eip = ip; +} + + +static void +panic_trap(x86_saved_state32_t *regs) +{ + const char *trapname = "Unknown"; + uint32_t cr0 = get_cr0(); + uint32_t cr2 = get_cr2(); + uint32_t cr3 = get_cr3(); + uint32_t cr4 = get_cr4(); + + if (panic_io_port) + (void)inb(panic_io_port); + + kprintf("panic trap number 0x%x, eip 0x%x\n", regs->trapno, regs->eip); + kprintf("cr0 0x%08x cr2 0x%08x cr3 0x%08x cr4 0x%08x\n", + cr0, cr2, cr3, cr4); + + if (regs->trapno < TRAP_TYPES) + trapname = trap_type[regs->trapno]; + + panic("Unresolved kernel trap (CPU %d, Type %d=%s), registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n", + cpu_number(), regs->trapno, trapname, cr0, cr2, cr3, cr4, + regs->eax,regs->ebx,regs->ecx,regs->edx, + regs->cr2,regs->ebp,regs->esi,regs->edi, + regs->efl,regs->eip,regs->cs, regs->ds); + /* + * This next statement is not executed, + * but it's needed to stop the compiler using tail call optimization + * for the panic call - which confuses the subsequent backtrace. + */ + cr0 = 0; } +extern void kprintf_break_lock(void); + + /* - * Called if both kernel_trap() and kdb_trap() fail. + * Called from locore on a special reserved stack after a double-fault + * is taken in kernel space. + * Kernel stack overflow is one route here. */ void -panic_trap( - register struct i386_saved_state *regs) +panic_double_fault(int code) { - int code; - register int type; + struct i386_tss *my_ktss = current_ktss(); + + /* Set postcode (DEBUG only) */ + postcode(PANIC_DOUBLE_FAULT); + +/* Issue an I/O port read if one has been requested - this is an event logic + * analyzers can use as a trigger point. + */ + if (panic_io_port) + (void)inb(panic_io_port); + + /* + * Break kprintf lock in case of recursion, + * and record originally faulted instruction address. + */ + kprintf_break_lock(); + +#if MACH_KDP + /* + * Print backtrace leading to first fault: + */ + panic_i386_backtrace((void *) my_ktss->ebp, 10); +#endif + + panic("Double fault (CPU:%d, thread:%p, code:0x%x)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x\n", + cpu_number(), current_thread(), code, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, + my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, + my_ktss->eflags, my_ktss->eip); +} + - type = regs->trapno; - code = regs->err; +/* + * Called from locore on a special reserved stack after a machine-check + */ +void +panic_machine_check(int code) +{ + struct i386_tss *my_ktss = current_ktss(); + + /* Set postcode (DEBUG only) */ + postcode(PANIC_MACHINE_CHECK); - printf("trap type %d, code = %x, pc = %x\n", - type, code, regs->eip); - panic("trap"); + /* + * Break kprintf lock in case of recursion, + * and record originally faulted instruction address. + */ + kprintf_break_lock(); + panic("Machine-check (CPU:%d, thread:%p, code:0x%x)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x\n", + cpu_number(), current_thread(), code, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, + my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, + my_ktss->eflags, my_ktss->eip); } +void +panic_double_fault64(x86_saved_state_t *esp) +{ + /* Set postcode (DEBUG only) */ + postcode(PANIC_DOUBLE_FAULT); + + /* + * Break kprintf lock in case of recursion, + * and record originally faulted instruction address. + */ + kprintf_break_lock(); + + /* + * Dump the interrupt stack frame at last kernel entry. + */ + if (is_saved_state64(esp)) { + x86_saved_state64_t *ss64p = saved_state64(esp); + panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" + "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" + "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" + "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" + "RFL: 0x%016qx, RIP: 0x%016qx\n", + cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, + ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, + ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, + ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, + ss64p->isf.rflags, ss64p->isf.rip); + } else { + x86_saved_state32_t *ss32p = saved_state32(esp); + panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%x)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x\n", + cpu_number(), current_thread(), ss32p->trapno, ss32p->err, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, + ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, + ss32p->efl, ss32p->eip); + } +} + +/* + * Simplistic machine check handler. + * We could peruse all those MSRs but we only dump register state as we do for + * the double fault exception. + * Note: the machine check registers are non-volatile across warm boot - so + * they'll be around when we return. + */ +void +panic_machine_check64(x86_saved_state_t *esp) +{ + /* Set postcode (DEBUG only) */ + postcode(PANIC_MACHINE_CHECK); + + /* + * Break kprintf lock in case of recursion, + * and record originally faulted instruction address. + */ + kprintf_break_lock(); + + /* + * Dump the interrupt stack frame at last kernel entry. + */ + if (is_saved_state64(esp)) { + x86_saved_state64_t *ss64p = saved_state64(esp); + panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" + "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" + "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" + "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" + "RFL: 0x%016qx, RIP: 0x%016qx\n", + cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, + ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, + ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, + ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, + ss64p->isf.rflags, ss64p->isf.rip); + } else { + x86_saved_state32_t *ss32p = saved_state32(esp); + panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%x)," + "registers:\n" + "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" + "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" + "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" + "EFL: 0x%08x, EIP: 0x%08x\n", + cpu_number(), current_thread(), ss32p->trapno, ss32p->err, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, + ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, + ss32p->efl, ss32p->eip); + } +} /* * Trap from user mode. */ void user_trap( - register struct i386_saved_state *regs) + x86_saved_state_t *saved_state) { int exc; int code; + int err; unsigned int subcode; - register int type; - vm_map_t map; + int type; + user_addr_t vaddr; vm_prot_t prot; - kern_return_t result; thread_t thread = current_thread(); - boolean_t kernel_act = FALSE; - - if (regs->efl & EFL_VM) { - /* - * If hardware assist can handle exception, - * continue execution. - */ - if (v86_assist(thread, regs)) - return; + ast_t *myast; + boolean_t intr; + kern_return_t kret; + user_addr_t rip; + + assert((is_saved_state32(saved_state) && !thread_is_64bit(thread)) || + (is_saved_state64(saved_state) && thread_is_64bit(thread))); + + if (is_saved_state64(saved_state)) { + x86_saved_state64_t *regs; + + regs = saved_state64(saved_state); + + type = regs->isf.trapno; + err = regs->isf.err & 0xffff; + vaddr = (user_addr_t)regs->cr2; + rip = (user_addr_t)regs->isf.rip; + } else { + x86_saved_state32_t *regs; + + regs = saved_state32(saved_state); + + type = regs->trapno; + err = regs->err & 0xffff; + vaddr = (user_addr_t)regs->cr2; + rip = (user_addr_t)regs->eip; } - type = regs->trapno; + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE, + (int)(vaddr>>32), (int)vaddr, (int)(rip>>32), (int)rip, 0); + code = 0; subcode = 0; exc = 0; +#if DEBUG_TRACE + kprintf("user_trap(0x%08x) type=%d vaddr=0x%016llx\n", + saved_state, type, vaddr); +#endif + myast = ast_pending(); + if (perfASTHook) { + if (*myast & AST_CHUD_ALL) { + perfASTHook(type, saved_state, 0, 0); + } + } else { + *myast &= ~AST_CHUD_ALL; + } + + /* Is there a hook? */ + if (perfTrapHook) { + if (perfTrapHook(type, saved_state, 0, 0) == KERN_SUCCESS) + return; /* If it succeeds, we are done... */ + } + switch (type) { case T_DIVIDE_ERROR: @@ -483,10 +902,37 @@ user_trap( break; case T_DEBUG: - exc = EXC_BREAKPOINT; - code = EXC_I386_SGL; - break; - + { + pcb_t pcb; + unsigned int clear = 0; + /* + * get dr6 and set it in the thread's pcb before + * returning to userland + */ + pcb = thread->machine.pcb; + if (pcb->ids) { + /* + * We can get and set the status register + * in 32-bit mode even on a 64-bit thread + * because the high order bits are not + * used on x86_64 + */ + if (thread_is_64bit(thread)) { + uint32_t dr6; + x86_debug_state64_t *ids = pcb->ids; + dr6 = (uint32_t)ids->dr6; + __asm__ volatile ("movl %%db6, %0" : "=r" (dr6)); + ids->dr6 = dr6; + } else { /* 32 bit thread */ + x86_debug_state32_t *ids = pcb->ids; + __asm__ volatile ("movl %%db6, %0" : "=r" (ids->dr6)); + } + __asm__ volatile ("movl %0, %%db6" : : "r" (clear)); + } + exc = EXC_BREAKPOINT; + code = EXC_I386_SGL; + break; + } case T_INT3: exc = EXC_BREAKPOINT; code = EXC_I386_BPT; @@ -519,67 +965,50 @@ user_trap( case 10: /* invalid TSS == iret with NT flag set */ exc = EXC_BAD_INSTRUCTION; code = EXC_I386_INVTSSFLT; - subcode = regs->err & 0xffff; + subcode = err; break; case T_SEGMENT_NOT_PRESENT: exc = EXC_BAD_INSTRUCTION; code = EXC_I386_SEGNPFLT; - subcode = regs->err & 0xffff; + subcode = err; break; case T_STACK_FAULT: exc = EXC_BAD_INSTRUCTION; code = EXC_I386_STKFLT; - subcode = regs->err & 0xffff; + subcode = err; break; case T_GENERAL_PROTECTION: - if (!(regs->efl & EFL_VM)) { - if (check_io_fault(regs)) - return; - } exc = EXC_BAD_INSTRUCTION; code = EXC_I386_GPFLT; - subcode = regs->err & 0xffff; + subcode = err; break; case T_PAGE_FAULT: - subcode = regs->cr2; - prot = VM_PROT_READ|VM_PROT_WRITE; - if (kernel_act == FALSE) { - if (!(regs->err & T_PF_WRITE)) - prot = VM_PROT_READ; - (void) user_page_fault_continue(vm_fault(thread->map, - trunc_page((vm_offset_t)subcode), - prot, - FALSE, - THREAD_ABORTSAFE, NULL, 0)); - /* NOTREACHED */ - } - else { - if (subcode > LINEAR_KERNEL_ADDRESS) { - map = kernel_map; - } - result = vm_fault(thread->map, - trunc_page((vm_offset_t)subcode), - prot, - FALSE, - (map == kernel_map) ? THREAD_UNINT : THREAD_ABORTSAFE, NULL, 0); - if ((result != KERN_SUCCESS) && (result != KERN_ABORTED)) { - /* - * Must expand vm_fault by hand, - * so that we can ask for read-only access - * but enter a (kernel) writable mapping. - */ - result = intel_read_fault(thread->map, - trunc_page((vm_offset_t)subcode)); - } - user_page_fault_continue(result); - /*NOTREACHED*/ - } + prot = VM_PROT_READ; + + if (err & T_PF_WRITE) + prot |= VM_PROT_WRITE; +#if PAE + if (err & T_PF_EXECUTE) + prot |= VM_PROT_EXECUTE; +#endif + kret = vm_fault(thread->map, vm_map_trunc_page(vaddr), + prot, FALSE, + THREAD_ABORTSAFE, NULL, 0); + + user_page_fault_continue(kret); + + /* NOTREACHED */ break; + case T_SSE_FLOAT_ERROR: + fpSSEexterrflt(); + return; + + case T_FLOATING_POINT_ERROR: fpexterrflt(); return; @@ -590,427 +1019,25 @@ user_trap( return; #endif /* MACH_KGDB */ #if MACH_KDB - if (kdb_trap(type, regs->err, regs)) + if (kdb_trap(type, err, saved_state)) return; #endif /* MACH_KDB */ - printf("user trap type %d, code = %x, pc = %x\n", - type, regs->err, regs->eip); panic("user trap"); return; } - -#if MACH_KDB - if (debug_all_traps_with_kdb && - kdb_trap(type, regs->err, regs)) - return; -#endif /* MACH_KDB */ + intr = ml_set_interrupts_enabled(FALSE); + myast = ast_pending(); + while (*myast & AST_ALL) { + ast_taken(AST_ALL, intr); + ml_set_interrupts_enabled(FALSE); + myast = ast_pending(); + } + ml_set_interrupts_enabled(intr); i386_exception(exc, code, subcode); /*NOTREACHED*/ } -/* - * V86 mode assist for interrupt handling. - */ -boolean_t v86_assist_on = TRUE; -boolean_t v86_unsafe_ok = FALSE; -boolean_t v86_do_sti_cli = TRUE; -boolean_t v86_do_sti_immediate = FALSE; - -#define V86_IRET_PENDING 0x4000 - -int cli_count = 0; -int sti_count = 0; - -boolean_t -v86_assist( - thread_t thread, - register struct i386_saved_state *regs) -{ - register struct v86_assist_state *v86 = &thread->machine.pcb->ims.v86s; - -/* - * Build an 8086 address. Use only when off is known to be 16 bits. - */ -#define Addr8086(seg,off) ((((seg) & 0xffff) << 4) + (off)) - -#define EFL_V86_SAFE ( EFL_OF | EFL_DF | EFL_TF \ - | EFL_SF | EFL_ZF | EFL_AF \ - | EFL_PF | EFL_CF ) - struct iret_32 { - int eip; - int cs; - int eflags; - }; - struct iret_16 { - unsigned short ip; - unsigned short cs; - unsigned short flags; - }; - union iret_struct { - struct iret_32 iret_32; - struct iret_16 iret_16; - }; - - struct int_vec { - unsigned short ip; - unsigned short cs; - }; - - if (!v86_assist_on) - return FALSE; - - /* - * If delayed STI pending, enable interrupts. - * Turn off tracing if on only to delay STI. - */ - if (v86->flags & V86_IF_PENDING) { - v86->flags &= ~V86_IF_PENDING; - v86->flags |= EFL_IF; - if ((v86->flags & EFL_TF) == 0) - regs->efl &= ~EFL_TF; - } - - if (regs->trapno == T_DEBUG) { - - if (v86->flags & EFL_TF) { - /* - * Trace flag was also set - it has priority - */ - return FALSE; /* handle as single-step */ - } - /* - * Fall through to check for interrupts. - */ - } - else if (regs->trapno == T_GENERAL_PROTECTION) { - /* - * General protection error - must be an 8086 instruction - * to emulate. - */ - register int eip; - boolean_t addr_32 = FALSE; - boolean_t data_32 = FALSE; - int io_port; - - /* - * Set up error handler for bad instruction/data - * fetches. - */ - __asm__("movl $(addr_error), %0" : : "m" (thread->recover)); - - eip = regs->eip; - while (TRUE) { - unsigned char opcode; - - if (eip > 0xFFFF) { - thread->recover = 0; - return FALSE; /* GP fault: IP out of range */ - } - - opcode = *(unsigned char *)Addr8086(regs->cs,eip); - eip++; - switch (opcode) { - case 0xf0: /* lock */ - case 0xf2: /* repne */ - case 0xf3: /* repe */ - case 0x2e: /* cs */ - case 0x36: /* ss */ - case 0x3e: /* ds */ - case 0x26: /* es */ - case 0x64: /* fs */ - case 0x65: /* gs */ - /* ignore prefix */ - continue; - - case 0x66: /* data size */ - data_32 = TRUE; - continue; - - case 0x67: /* address size */ - addr_32 = TRUE; - continue; - - case 0xe4: /* inb imm */ - case 0xe5: /* inw imm */ - case 0xe6: /* outb imm */ - case 0xe7: /* outw imm */ - io_port = *(unsigned char *)Addr8086(regs->cs, eip); - eip++; - goto do_in_out; - - case 0xec: /* inb dx */ - case 0xed: /* inw dx */ - case 0xee: /* outb dx */ - case 0xef: /* outw dx */ - case 0x6c: /* insb */ - case 0x6d: /* insw */ - case 0x6e: /* outsb */ - case 0x6f: /* outsw */ - io_port = regs->edx & 0xffff; - - do_in_out: - if (!data_32) - opcode |= 0x6600; /* word IO */ - - switch (emulate_io(regs, opcode, io_port)) { - case EM_IO_DONE: - /* instruction executed */ - break; - case EM_IO_RETRY: - /* port mapped, retry instruction */ - thread->recover = 0; - return TRUE; - case EM_IO_ERROR: - /* port not mapped */ - thread->recover = 0; - return FALSE; - } - break; - - case 0xfa: /* cli */ - if (!v86_do_sti_cli) { - thread->recover = 0; - return (FALSE); - } - - v86->flags &= ~EFL_IF; - /* disable simulated interrupts */ - cli_count++; - break; - - case 0xfb: /* sti */ - if (!v86_do_sti_cli) { - thread->recover = 0; - return (FALSE); - } - - if ((v86->flags & EFL_IF) == 0) { - if (v86_do_sti_immediate) { - v86->flags |= EFL_IF; - } else { - v86->flags |= V86_IF_PENDING; - regs->efl |= EFL_TF; - } - /* single step to set IF next inst. */ - } - sti_count++; - break; - - case 0x9c: /* pushf */ - { - int flags; - vm_offset_t sp; - unsigned int size; - - flags = regs->efl; - if ((v86->flags & EFL_IF) == 0) - flags &= ~EFL_IF; - - if ((v86->flags & EFL_TF) == 0) - flags &= ~EFL_TF; - else flags |= EFL_TF; - - sp = regs->uesp; - if (!addr_32) - sp &= 0xffff; - else if (sp > 0xffff) - goto stack_error; - size = (data_32) ? 4 : 2; - if (sp < size) - goto stack_error; - sp -= size; - if (copyout((char *)&flags, - (user_addr_t)Addr8086(regs->ss,sp), - size)) - goto addr_error; - if (addr_32) - regs->uesp = sp; - else - regs->uesp = (regs->uesp & 0xffff0000) | sp; - break; - } - - case 0x9d: /* popf */ - { - vm_offset_t sp; - int nflags; - - sp = regs->uesp; - if (!addr_32) - sp &= 0xffff; - else if (sp > 0xffff) - goto stack_error; - - if (data_32) { - if (sp > 0xffff - sizeof(int)) - goto stack_error; - nflags = *(int *)Addr8086(regs->ss,sp); - sp += sizeof(int); - } - else { - if (sp > 0xffff - sizeof(short)) - goto stack_error; - nflags = *(unsigned short *) - Addr8086(regs->ss,sp); - sp += sizeof(short); - } - if (addr_32) - regs->uesp = sp; - else - regs->uesp = (regs->uesp & 0xffff0000) | sp; - - if (v86->flags & V86_IRET_PENDING) { - v86->flags = nflags & (EFL_TF | EFL_IF); - v86->flags |= V86_IRET_PENDING; - } else { - v86->flags = nflags & (EFL_TF | EFL_IF); - } - regs->efl = (regs->efl & ~EFL_V86_SAFE) - | (nflags & EFL_V86_SAFE); - break; - } - case 0xcf: /* iret */ - { - vm_offset_t sp; - int nflags; - union iret_struct iret_struct; - - v86->flags &= ~V86_IRET_PENDING; - sp = regs->uesp; - if (!addr_32) - sp &= 0xffff; - else if (sp > 0xffff) - goto stack_error; - - if (data_32) { - if (sp > 0xffff - sizeof(struct iret_32)) - goto stack_error; - iret_struct.iret_32 = - *(struct iret_32 *) Addr8086(regs->ss,sp); - sp += sizeof(struct iret_32); - } - else { - if (sp > 0xffff - sizeof(struct iret_16)) - goto stack_error; - iret_struct.iret_16 = - *(struct iret_16 *) Addr8086(regs->ss,sp); - sp += sizeof(struct iret_16); - } - if (addr_32) - regs->uesp = sp; - else - regs->uesp = (regs->uesp & 0xffff0000) | sp; - - if (data_32) { - eip = iret_struct.iret_32.eip; - regs->cs = iret_struct.iret_32.cs & 0xffff; - nflags = iret_struct.iret_32.eflags; - } - else { - eip = iret_struct.iret_16.ip; - regs->cs = iret_struct.iret_16.cs; - nflags = iret_struct.iret_16.flags; - } - - v86->flags = nflags & (EFL_TF | EFL_IF); - regs->efl = (regs->efl & ~EFL_V86_SAFE) - | (nflags & EFL_V86_SAFE); - break; - } - default: - /* - * Instruction not emulated here. - */ - thread->recover = 0; - return FALSE; - } - break; /* exit from 'while TRUE' */ - } - regs->eip = (regs->eip & 0xffff0000) | eip; - } - else { - /* - * Not a trap we handle. - */ - thread->recover = 0; - return FALSE; - } - - if ((v86->flags & EFL_IF) && ((v86->flags & V86_IRET_PENDING)==0)) { - - struct v86_interrupt_table *int_table; - int int_count; - int vec; - int i; - - int_table = (struct v86_interrupt_table *) v86->int_table; - int_count = v86->int_count; - - vec = 0; - for (i = 0; i < int_count; int_table++, i++) { - if (!int_table->mask && int_table->count > 0) { - int_table->count--; - vec = int_table->vec; - break; - } - } - if (vec != 0) { - /* - * Take this interrupt - */ - vm_offset_t sp; - struct iret_16 iret_16; - struct int_vec int_vec; - - sp = regs->uesp & 0xffff; - if (sp < sizeof(struct iret_16)) - goto stack_error; - sp -= sizeof(struct iret_16); - iret_16.ip = regs->eip; - iret_16.cs = regs->cs; - iret_16.flags = regs->efl & 0xFFFF; - if ((v86->flags & EFL_TF) == 0) - iret_16.flags &= ~EFL_TF; - else iret_16.flags |= EFL_TF; - - (void) memcpy((char *) &int_vec, - (char *) (sizeof(struct int_vec) * vec), - sizeof (struct int_vec)); - if (copyout((char *)&iret_16, - (user_addr_t)Addr8086(regs->ss,sp), - sizeof(struct iret_16))) - goto addr_error; - regs->uesp = (regs->uesp & 0xFFFF0000) | (sp & 0xffff); - regs->eip = int_vec.ip; - regs->cs = int_vec.cs; - regs->efl &= ~EFL_TF; - v86->flags &= ~(EFL_IF | EFL_TF); - v86->flags |= V86_IRET_PENDING; - } - } - - thread->recover = 0; - return TRUE; - - /* - * On address error, report a page fault. - * XXX report GP fault - we don`t save - * the faulting address. - */ - addr_error: - __asm__("addr_error:;"); - thread->recover = 0; - return FALSE; - - /* - * On stack address error, return stack fault (12). - */ - stack_error: - thread->recover = 0; - regs->trapno = T_STACK_FAULT; - return FALSE; -} /* * Handle AST traps for i386. @@ -1023,45 +1050,17 @@ extern void log_thread_action (thread_t, char *); void i386_astintr(int preemption) { - ast_t *my_ast, mask = AST_ALL; + ast_t mask = AST_ALL; spl_t s; - s = splsched(); /* block interrupts to check reasons */ - mp_disable_preemption(); - my_ast = ast_pending(); - if (*my_ast & AST_I386_FP) { - /* - * AST was for delayed floating-point exception - - * FP interrupt occurred while in kernel. - * Turn off this AST reason and handle the FPU error. - */ - - ast_off(AST_I386_FP); - mp_enable_preemption(); - splx(s); - - fpexterrflt(); - } - else { - /* - * Not an FPU trap. Handle the AST. - * Interrupts are still blocked. - */ - -#if 1 - if (preemption) { - mask = AST_PREEMPTION; - mp_enable_preemption(); - } else { - mp_enable_preemption(); - } -#else - mp_enable_preemption(); -#endif + if (preemption) + mask = AST_PREEMPTION; + + s = splsched(); ast_taken(mask, s); - } + splx(s); } /* @@ -1080,133 +1079,68 @@ i386_exception( int code, int subcode) { - spl_t s; exception_data_type_t codes[EXCEPTION_CODE_MAX]; - /* - * Turn off delayed FPU error handling. - */ - s = splsched(); - mp_disable_preemption(); - ast_off(AST_I386_FP); - mp_enable_preemption(); - splx(s); - codes[0] = code; /* new exception interface */ codes[1] = subcode; exception_triage(exc, codes, 2); /*NOTREACHED*/ } -boolean_t -check_io_fault( - struct i386_saved_state *regs) + +void +kernel_preempt_check(void) { - int eip, opcode, io_port; - boolean_t data_16 = FALSE; + ast_t *myast; + boolean_t intr; /* - * Get the instruction. + * disable interrupts to both prevent pre-emption + * and to keep the ast state from changing via + * an interrupt handler making something runnable */ - eip = regs->eip; - - for (;;) { - opcode = inst_fetch(eip, regs->cs); - eip++; - switch (opcode) { - case 0x66: /* data-size prefix */ - data_16 = TRUE; - continue; - - case 0xf3: /* rep prefix */ - case 0x26: /* es */ - case 0x2e: /* cs */ - case 0x36: /* ss */ - case 0x3e: /* ds */ - case 0x64: /* fs */ - case 0x65: /* gs */ - continue; - - case 0xE4: /* inb imm */ - case 0xE5: /* inl imm */ - case 0xE6: /* outb imm */ - case 0xE7: /* outl imm */ - /* port is immediate byte */ - io_port = inst_fetch(eip, regs->cs); - eip++; - break; - - case 0xEC: /* inb dx */ - case 0xED: /* inl dx */ - case 0xEE: /* outb dx */ - case 0xEF: /* outl dx */ - case 0x6C: /* insb */ - case 0x6D: /* insl */ - case 0x6E: /* outsb */ - case 0x6F: /* outsl */ - /* port is in DX register */ - io_port = regs->edx & 0xFFFF; - break; - - default: - return FALSE; - } - break; - } - - if (data_16) - opcode |= 0x6600; /* word IO */ - - switch (emulate_io(regs, opcode, io_port)) { - case EM_IO_DONE: - /* instruction executed */ - regs->eip = eip; - return TRUE; + intr = ml_set_interrupts_enabled(FALSE); - case EM_IO_RETRY: - /* port mapped, retry instruction */ - return TRUE; - - case EM_IO_ERROR: - /* port not mapped */ - return FALSE; - } - return FALSE; -} + myast = ast_pending(); -void -kernel_preempt_check (void) -{ - ast_t *myast; + if ((*myast & AST_URGENT) && intr == TRUE && get_interrupt_level() == 0) { + /* + * can handle interrupts and preemptions + * at this point + */ + ml_set_interrupts_enabled(intr); - mp_disable_preemption(); - myast = ast_pending(); - if ((*myast & AST_URGENT) && - get_interrupt_level() == 1 - ) { - mp_enable_preemption_no_check(); - __asm__ volatile (" int $0xff"); - } else { - mp_enable_preemption_no_check(); + /* + * now cause the PRE-EMPTION trap + */ + __asm__ volatile (" int $0xff"); + } else { + /* + * if interrupts were already disabled or + * we're in an interrupt context, we can't + * preempt... of course if AST_URGENT + * isn't set we also don't want to + */ + ml_set_interrupts_enabled(intr); } } #if MACH_KDB -extern void db_i386_state(struct i386_saved_state *regs); +extern void db_i386_state(x86_saved_state32_t *regs); #include void db_i386_state( - struct i386_saved_state *regs) + x86_saved_state32_t *regs) { db_printf("eip %8x\n", regs->eip); db_printf("trap %8x\n", regs->trapno); db_printf("err %8x\n", regs->err); db_printf("efl %8x\n", regs->efl); db_printf("ebp %8x\n", regs->ebp); - db_printf("esp %8x\n", regs->esp); + db_printf("esp %8x\n", regs->cr2); db_printf("uesp %8x\n", regs->uesp); db_printf("cs %8x\n", regs->cs & 0xff); db_printf("ds %8x\n", regs->ds & 0xff); @@ -1223,3 +1157,85 @@ db_i386_state( } #endif /* MACH_KDB */ + +/* Synchronize a thread's i386_kernel_state (if any) with the given + * i386_saved_state_t obtained from the trap/IPI handler; called in + * kernel_trap() prior to entering the debugger, and when receiving + * an "MP_KDP" IPI. + */ + +void +sync_iss_to_iks(x86_saved_state32_t *saved_state) +{ + struct x86_kernel_state32 *iks; + vm_offset_t kstack; + boolean_t record_active_regs = FALSE; + + if ((kstack = current_thread()->kernel_stack) != 0) { + x86_saved_state32_t *regs; + + regs = saved_state; + + iks = STACK_IKS(kstack); + + /* + * Did we take the trap/interrupt in kernel mode? + */ + if (regs == USER_REGS32(current_thread())) + record_active_regs = TRUE; + else { + iks->k_ebx = regs->ebx; + iks->k_esp = (int)regs; + iks->k_ebp = regs->ebp; + iks->k_edi = regs->edi; + iks->k_esi = regs->esi; + iks->k_eip = regs->eip; + } + } + + if (record_active_regs == TRUE) { + /* + * Show the trap handler path + */ + __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); + __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); + __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); + __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); + __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); + /* + * "Current" instruction pointer + */ + __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); + } +} + +/* + * This is used by the NMI interrupt handler (from mp.c) to + * uncondtionally sync the trap handler context to the IKS + * irrespective of whether the NMI was fielded in kernel + * or user space. + */ +void +sync_iss_to_iks_unconditionally(__unused x86_saved_state32_t *saved_state) { + struct x86_kernel_state32 *iks; + vm_offset_t kstack; + boolean_t record_active_regs = FALSE; + + if ((kstack = current_thread()->kernel_stack) != 0) { + + iks = STACK_IKS(kstack); + /* + * Show the trap handler path + */ + __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); + __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); + __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); + __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); + __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); + /* + * "Current" instruction pointer + */ + __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); + + } +} diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index f3563e0ba..44eded817 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -74,6 +74,8 @@ /* 15 */ #define T_FLOATING_POINT_ERROR 16 #define T_WATCHPOINT 17 +#define T_MACHINE_CHECK 18 +#define T_SSE_FLOAT_ERROR 19 #define T_PREEMPT 255 #define TRAP_NAMES "divide error", "debug trap", "NMI", "breakpoint", \ @@ -81,7 +83,7 @@ "no coprocessor", "double fault", "coprocessor overrun", \ "invalid TSS", "segment not present", "stack bounds", \ "general protection", "page fault", "(reserved)", \ - "coprocessor error", "watchpoint" + "coprocessor error", "watchpoint", "(reserved)", "SSE floating point" /* * Page-fault trap codes. @@ -90,6 +92,12 @@ #define T_PF_WRITE 0x2 /* write access */ #define T_PF_USER 0x4 /* from user state */ +#ifdef PAE +#define T_PF_RSVD 0x8 /* reserved bit set to 1 */ +#define T_PF_EXECUTE 0x10 /* instruction fetch when NX */ +#endif + + #if !defined(ASSEMBLER) && defined(MACH_KERNEL) #include @@ -99,23 +107,42 @@ extern void i386_exception( int code, int subcode); -extern boolean_t kernel_trap( - struct i386_saved_state *regs); +extern void sync_iss_to_iks(x86_saved_state32_t *regs); + +extern void sync_iss_to_iks_unconditionally(x86_saved_state32_t *regs); + +extern void kernel_trap(x86_saved_state_t *regs); + +extern void user_trap(x86_saved_state_t *regs); -extern void panic_trap( - struct i386_saved_state *regs); +extern void panic_double_fault(int code); -extern void user_trap( - struct i386_saved_state *regs); +extern void panic_double_fault64(x86_saved_state_t *regs); + +extern void panic_machine_check(int code); + +extern void panic_machine_check64(x86_saved_state_t *regs); extern void i386_astintr(int preemption); -#if defined(MACH_KDP) -extern void kdp_i386_trap( + +typedef kern_return_t (*perfCallback)( + int trapno, + void *regs, + int unused1, + int unused2); + +extern perfCallback perfTrapHook; +extern perfCallback perfASTHook; +extern perfCallback perfIntHook; + +#if MACH_KDP +extern boolean_t kdp_i386_trap( unsigned int, - struct i386_saved_state *, + x86_saved_state32_t *, kern_return_t, vm_offset_t); +extern void panic_i386_backtrace(void *, int); #endif /* MACH_KDP */ #endif /* !ASSEMBLER && MACH_KERNEL */ diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c new file mode 100644 index 000000000..d299aff34 --- /dev/null +++ b/osfmk/i386/tsc.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ + +/* + * File: i386/tsc.c + * Purpose: Initializes the TSC and the various conversion + * factors needed by other parts of the system. + */ + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for kernel_map */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +uint64_t busFCvtt2n = 0; +uint64_t busFCvtn2t = 0; +uint64_t tscFreq = 0; +uint64_t tscFCvtt2n = 0; +uint64_t tscFCvtn2t = 0; +uint64_t tscGranularity = 0; +uint64_t bus2tsc = 0; + +/* Decimal powers: */ +#define kilo (1000ULL) +#define Mega (kilo * kilo) +#define Giga (kilo * Mega) +#define Tera (kilo * Giga) +#define Peta (kilo * Tera) + +static const char FSB_Frequency_prop[] = "FSBFrequency"; +/* + * This routine extracts the front-side bus frequency in Hz from + * the device tree. + */ +static uint64_t +EFI_FSB_frequency(void) +{ + uint64_t frequency = 0; + DTEntry entry; + void *value; + int size; + + if (DTLookupEntry(0, "/efi/platform", &entry) != kSuccess) { + kprintf("EFI_FSB_frequency: didn't find /efi/platform\n"); + return 0; + } + if (DTGetProperty(entry,FSB_Frequency_prop,&value,&size) != kSuccess) { + kprintf("EFI_FSB_frequency: property %s not found\n"); + return 0; + } + if (size == sizeof(uint64_t)) { + frequency = *(uint64_t *) value; + kprintf("EFI_FSB_frequency: read %s value: %llu\n", + FSB_Frequency_prop, frequency); + if (!(90*Mega < frequency && frequency < 10*Giga)) { + kprintf("EFI_FSB_frequency: value out of range\n"); + frequency = 0; + } + } else { + kprintf("EFI_FSB_frequency: unexpected size %d\n", size); + } + return frequency; +} + +/* + * Initialize the various conversion factors needed by code referencing + * the TSC. + */ +void +tsc_init(void) +{ + uint64_t busFreq; + uint64_t busFCvtInt; + uint32_t cpuModel; + uint32_t cpuFamily; + uint32_t xcpuid[4]; + + /* + * Get the FSB frequency and conversion factors. + */ + busFreq = EFI_FSB_frequency(); + if (busFreq != 0) { + busFCvtt2n = ((1 * Giga) << 32) / busFreq; + busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n; + busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq); + } else { + panic("rtclock_init: EFI not supported!\n"); + } + + kprintf(" BUS: Frequency = %6d.%04dMHz, " + "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, " + "cvtInt = %08X.%08X\n", + (uint32_t)(busFreq / Mega), + (uint32_t)(busFreq % Mega), + (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n, + (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t, + (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt); + + do_cpuid(1, xcpuid); + cpuFamily = ( xcpuid[eax] >> 8 ) & 0xf; + /* + * Get the extended family if necessary. + */ + if (cpuFamily == 0x0f) + cpuFamily += (xcpuid[eax] >> 20) & 0x00ff; + + cpuModel = ( xcpuid[eax] >> 4 ) & 0xf; + /* + * Get the extended model if necessary. + */ + if (cpuFamily == CPUID_FAMILY_686 + || cpuFamily == CPUID_FAMILY_EXTENDED) + cpuModel += ((xcpuid[eax] >> 16) & 0xf) << 4; + + /* + * Get the TSC increment. The TSC is incremented by this + * on every bus tick. Calculate the TSC conversion factors + * to and from nano-seconds. + */ + if (cpuFamily == CPUID_FAMILY_686) { + if (cpuModel == CPUID_MODEL_CORE || cpuModel == CPUID_MODEL_CORE2) { + uint64_t prfsts; + + prfsts = rdmsr64(IA32_PERF_STS); + tscGranularity = (uint32_t)(prfsts >> BusRatioShift) & BusRatioMask; + } else { + panic("rtclock_init: unknown CPU model: 0x%X\n", + cpuModel); + } + } else { + panic("rtclock_init: unknown CPU family: 0x%X\n", + cpuFamily); + } + + tscFCvtt2n = busFCvtt2n / (uint64_t)tscGranularity; + tscFreq = ((1 * Giga) << 32) / tscFCvtt2n; + tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n; + + kprintf(" TSC: Frequency = %6d.%04dMHz, " + "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %d\n", + (uint32_t)(tscFreq / Mega), + (uint32_t)(tscFreq % Mega), + (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n, + (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t, + tscGranularity); + + /* + * Calculate conversion from BUS to TSC + */ + bus2tsc = tmrCvt(busFCvtt2n, tscFCvtn2t); +} + +void +tsc_get_info(tscInfo_t *info) +{ + info->busFCvtt2n = busFCvtt2n; + info->busFCvtn2t = busFCvtn2t; + info->tscFreq = tscFreq; + info->tscFCvtt2n = tscFCvtt2n; + info->tscFCvtn2t = tscFCvtn2t; + info->tscGranularity = tscGranularity; + info->bus2tsc = bus2tsc; +} diff --git a/osfmk/i386/tsc.h b/osfmk/i386/tsc.h new file mode 100644 index 000000000..71d2d6527 --- /dev/null +++ b/osfmk/i386/tsc.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: tsc.h + * Purpose: Contains the TSC initialization and conversion + * factors. + */ +#ifdef KERNEL_PRIVATE +#ifndef _I386_TSC_H_ +#define _I386_TSC_H_ + +#define BusRatioShift 40 +#define BusRatioMask 0x1F +#define IA32_PERF_STS 0x198 + +extern uint64_t busFCvtt2n; +extern uint64_t busFCvtn2t; +extern uint64_t tscFreq; +extern uint64_t tscFCvtt2n; +extern uint64_t tscFCvtn2t; +extern uint64_t tscGranularity; +extern uint64_t bus2tsc; + +struct tscInfo +{ +uint64_t busFCvtt2n; +uint64_t busFCvtn2t; +uint64_t tscFreq; +uint64_t tscFCvtt2n; +uint64_t tscFCvtn2t; +uint64_t tscGranularity; +uint64_t bus2tsc; +}; +typedef struct tscInfo tscInfo_t; + +extern void tsc_get_info(tscInfo_t *info); + +extern void tsc_init(void); + +#endif /* _I386_TSC_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/tss.h b/osfmk/i386/tss.h index 21604bcd3..85dc5333b 100644 --- a/osfmk/i386/tss.h +++ b/osfmk/i386/tss.h @@ -53,41 +53,75 @@ #ifndef _I386_TSS_H_ #define _I386_TSS_H_ +#include + /* * i386 Task State Segment */ struct i386_tss { - int back_link; /* segment number of previous task, + uint32_t back_link; /* segment number of previous task, if nested */ - int esp0; /* initial stack pointer ... */ - int ss0; /* and segment for ring 0 */ - int esp1; /* initial stack pointer ... */ - int ss1; /* and segment for ring 1 */ - int esp2; /* initial stack pointer ... */ - int ss2; /* and segment for ring 2 */ - int cr3; /* CR3 - page table directory + uint32_t esp0; /* initial stack pointer ... */ + uint32_t ss0; /* and segment for ring 0 */ + uint32_t esp1; /* initial stack pointer ... */ + uint32_t ss1; /* and segment for ring 1 */ + uint32_t esp2; /* initial stack pointer ... */ + uint32_t ss2; /* and segment for ring 2 */ + uint32_t cr3; /* CR3 - page table directory physical address */ - int eip; - int eflags; - int eax; - int ecx; - int edx; - int ebx; - int esp; /* current stack pointer */ - int ebp; - int esi; - int edi; - int es; - int cs; - int ss; /* current stack segment */ - int ds; - int fs; - int gs; - int ldt; /* local descriptor table segment */ - unsigned short trace_trap; /* trap on switch to this task */ - unsigned short io_bit_map_offset; + uint32_t eip; + uint32_t eflags; + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; /* current stack pointer */ + uint32_t ebp; + uint32_t esi; + uint32_t edi; + uint32_t es; + uint32_t cs; + uint32_t ss; /* current stack segment */ + uint32_t ds; + uint32_t fs; + uint32_t gs; + uint32_t ldt; /* local descriptor table segment */ + uint16_t trace_trap; /* trap on switch to this task */ + uint16_t io_bit_map_offset; /* offset to start of IO permission bit map */ }; +/* + * Temporary stack used on kernel entry via the sysenter instruction. + * Its top points on to the PCB save area. It must contain space for + * a single interrupt stack frame in case of single-stepping over the sysenter. + * Although this is defined as a 64-bit stack, the space is also used in + * 32-bit legacy mode. For 64-bit the stack is 16-byte aligned. + */ +struct sysenter_stack { + uint64_t stack[16]; /* Space for a 64-bit frame and some */ + uint64_t top; /* Top and pointer to ISS in PCS */ +}; + +struct x86_64_tss { + uint32_t reserved1; + uint64_t rsp0; /* stack pointer for CPL0 */ + uint64_t rsp1; /* stack pointer for CPL1 */ + uint64_t rsp2; /* stack pointer for CPL2 */ + uint32_t reserved2; + uint32_t reserved3; + uint64_t ist1; /* interrupt stack table 1 */ + uint64_t ist2; /* interrupt stack table 2 */ + uint64_t ist3; /* interrupt stack table 3 */ + uint64_t ist4; /* interrupt stack table 4 */ + uint64_t ist5; /* interrupt stack table 5 */ + uint64_t ist6; /* interrupt stack table 6 */ + uint64_t ist7; /* interrupt stack table 7 */ + uint32_t reserved4; + uint32_t reserved5; + uint16_t reserved6; + uint16_t io_bit_map_offset; + /* offset to IO permission bit map */ +}; #endif /* _I386_TSS_H_ */ diff --git a/osfmk/i386/user_ldt.c b/osfmk/i386/user_ldt.c index 9cad6b506..1c735f0d0 100644 --- a/osfmk/i386/user_ldt.c +++ b/osfmk/i386/user_ldt.c @@ -53,7 +53,7 @@ /* * User LDT management. - * Each thread in a task may have its own LDT. + * Each task may have its own LDT. */ #include @@ -65,366 +65,328 @@ #include #include #include +#include +#include +#include +#include +#include -char acc_type[8][3] = { - /* code stack data */ - { 0, 0, 1 }, /* data */ - { 0, 1, 1 }, /* data, writable */ - { 0, 0, 1 }, /* data, expand-down */ - { 0, 1, 1 }, /* data, writable, expand-down */ - { 1, 0, 0 }, /* code */ - { 1, 0, 1 }, /* code, readable */ - { 1, 0, 0 }, /* code, conforming */ - { 1, 0, 1 }, /* code, readable, conforming */ -}; - -#if 0 -/* Forward */ - -extern boolean_t selector_check( - thread_t thread, - int sel, - int type); - -boolean_t -selector_check( - thread_t thread, - int sel, - int type) -{ - struct user_ldt *ldt; - int access; - - ldt = thread->machine.pcb->ims.ldt; - if (ldt == 0) { - switch (type) { - case S_CODE: - return sel == USER_CS; - case S_STACK: - return sel == USER_DS; - case S_DATA: - return sel == 0 || - sel == USER_CS || - sel == USER_DS; - } - } +#include - if (type != S_DATA && sel == 0) - return FALSE; - if ((sel & (SEL_LDTS|SEL_PL)) != (SEL_LDTS|SEL_PL_U) - || sel > ldt->desc.limit_low) - return FALSE; - - access = ldt->ldt[sel_idx(sel)].access; - - if ((access & (ACC_P|ACC_PL|ACC_TYPE_USER)) - != (ACC_P|ACC_PL_U|ACC_TYPE_USER)) - return FALSE; - /* present, pl == pl.user, not system */ - - return acc_type[(access & 0xe)>>1][type]; -} +static void user_ldt_set_action(void *); /* * Add the descriptors to the LDT, starting with * the descriptor for 'first_selector'. */ -kern_return_t +int i386_set_ldt( - thread_t thr_act, - int first_selector, - descriptor_list_t desc_list, - mach_msg_type_number_t count) + int *retval, + uint32_t start_sel, + uint32_t descs, /* out */ + uint32_t num_sels) { - user_ldt_t new_ldt, old_ldt, temp; + user_ldt_t new_ldt, old_ldt; struct real_descriptor *dp; - int i; - int min_selector = 0; - pcb_t pcb; - vm_size_t ldt_size_needed; - int first_desc = sel_idx(first_selector); - vm_map_copy_t old_copy_object; - thread_t thread; - - if (first_desc < min_selector || first_desc > 8191) - return KERN_INVALID_ARGUMENT; - if (first_desc + count >= 8192) - return KERN_INVALID_ARGUMENT; - if (thr_act == THREAD_NULL) - return KERN_INVALID_ARGUMENT; - if ((thread = act_lock_thread(thr_act)) == THREAD_NULL) { - act_unlock_thread(thr_act); - return KERN_INVALID_ARGUMENT; - } - if (thread == current_thread()) - min_selector = LDTSZ; - act_unlock_thread(thr_act); + unsigned int i; + unsigned int min_selector = LDTSZ_MIN; /* do not allow the system selectors to be changed */ + task_t task = current_task(); + unsigned int ldt_count; + kern_return_t err; - /* - * We must copy out desc_list to the kernel map, and wire - * it down (we touch it while the PCB is locked). - * - * We make a copy of the copyin object, and clear - * out the old one, so that the MIG stub will have a - * a empty (but valid) copyin object to discard. - */ - { - kern_return_t kr; - vm_map_offset_t dst_addr; - - old_copy_object = (vm_map_copy_t) desc_list; - - kr = vm_map_copyout(ipc_kernel_map, &dst_addr, - vm_map_copy_copy(old_copy_object)); - if (kr != KERN_SUCCESS) - return kr; - - (void) vm_map_wire(ipc_kernel_map, - vm_map_trunc_page(dst_addr), - vm_map_round_page(dst_addr + - count * sizeof(struct real_descriptor)), - VM_PROT_READ|VM_PROT_WRITE, FALSE); - desc_list = CAST_DOWN(descriptor_list_t, dst_addr); - } + if (start_sel != LDT_AUTO_ALLOC + && (start_sel != 0 || num_sels != 0) + && (start_sel < min_selector || start_sel >= LDTSZ)) + return EINVAL; + if (start_sel != LDT_AUTO_ALLOC + && start_sel + num_sels > LDTSZ) + return EINVAL; - for (i = 0, dp = (struct real_descriptor *) desc_list; - i < count; - i++, dp++) - { - switch (dp->access & ~ACC_A) { - case 0: - case ACC_P: - /* valid empty descriptor */ - break; - case ACC_P | ACC_CALL_GATE: - /* Mach kernel call */ - *dp = *(struct real_descriptor *) - &ldt[sel_idx(USER_SCALL)]; - break; - case ACC_P | ACC_PL_U | ACC_DATA: - case ACC_P | ACC_PL_U | ACC_DATA_W: - case ACC_P | ACC_PL_U | ACC_DATA_E: - case ACC_P | ACC_PL_U | ACC_DATA_EW: - case ACC_P | ACC_PL_U | ACC_CODE: - case ACC_P | ACC_PL_U | ACC_CODE_R: - case ACC_P | ACC_PL_U | ACC_CODE_C: - case ACC_P | ACC_PL_U | ACC_CODE_CR: - case ACC_P | ACC_PL_U | ACC_CALL_GATE_16: - case ACC_P | ACC_PL_U | ACC_CALL_GATE: - break; - default: - (void) vm_map_remove(ipc_kernel_map, - vm_map_trunc_page(desc_list), - vm_map_round_page(&desc_list[count]), - VM_MAP_REMOVE_KUNWIRE); - return KERN_INVALID_ARGUMENT; + task_lock(task); + + old_ldt = task->i386_ldt; + + if (start_sel == LDT_AUTO_ALLOC) { + if (old_ldt) { + unsigned int null_count; + struct real_descriptor null_ldt; + + bzero(&null_ldt, sizeof(null_ldt)); + + /* + * Look for null selectors among the already-allocated + * entries. + */ + null_count = 0; + i = 0; + while (i < old_ldt->count) + { + if (!memcmp(&old_ldt->ldt[i++], &null_ldt, sizeof(null_ldt))) { + null_count++; + if (null_count == num_sels) + break; /* break out of while loop */ + } else { + null_count = 0; + } + } + + /* + * If we broke out of the while loop, i points to the selector + * after num_sels null selectors. Otherwise it points to the end + * of the old LDTs, and null_count is the number of null selectors + * at the end. + * + * Either way, there are null_count null selectors just prior to + * the i-indexed selector, and either null_count >= num_sels, + * or we're at the end, so we can extend. + */ + start_sel = old_ldt->start + i - null_count; + } else { + start_sel = LDTSZ_MIN; + } + + if (start_sel + num_sels > LDTSZ) { + task_unlock(task); + return ENOMEM; } } - ldt_size_needed = sizeof(struct real_descriptor) - * (first_desc + count); - - pcb = thr_act->machine.pcb; - new_ldt = 0; - Retry: - simple_lock(&pcb->lock); - old_ldt = pcb->ims.ldt; - if (old_ldt == 0 || - old_ldt->desc.limit_low + 1 < ldt_size_needed) - { + + if (start_sel == 0 && num_sels == 0) { + new_ldt = NULL; + } else { /* - * No old LDT, or not big enough + * Allocate new LDT */ - if (new_ldt == 0) { - simple_unlock(&pcb->lock); - - new_ldt = (user_ldt_t) kalloc(ldt_size_needed - + sizeof(struct real_descriptor)); - new_ldt->desc.limit_low = ldt_size_needed - 1; - new_ldt->desc.limit_high = 0; - new_ldt->desc.base_low = - ((vm_offset_t)&new_ldt->ldt[0]) & 0xffff; - new_ldt->desc.base_med = - (((vm_offset_t)&new_ldt->ldt[0]) >> 16) - & 0xff; - new_ldt->desc.base_high = - ((vm_offset_t)&new_ldt->ldt[0]) >> 24; - new_ldt->desc.access = ACC_P | ACC_LDT; - new_ldt->desc.granularity = 0; - - goto Retry; + + unsigned int begin_sel = start_sel; + unsigned int end_sel = begin_sel + num_sels; + + if (old_ldt != NULL) { + if (old_ldt->start < begin_sel) + begin_sel = old_ldt->start; + if (old_ldt->start + old_ldt->count > end_sel) + end_sel = old_ldt->start + old_ldt->count; + } + + ldt_count = end_sel - begin_sel; + + new_ldt = (user_ldt_t)kalloc(sizeof(struct user_ldt) + (ldt_count * sizeof(struct real_descriptor))); + if (new_ldt == NULL) { + task_unlock(task); + return ENOMEM; } + new_ldt->start = begin_sel; + new_ldt->count = ldt_count; + /* * Have new LDT. If there was a an old ldt, copy descriptors - * from old to new. Otherwise copy the default ldt. + * from old to new. */ if (old_ldt) { - bcopy((char *)&old_ldt->ldt[0], - (char *)&new_ldt->ldt[0], - old_ldt->desc.limit_low + 1); + bcopy(&old_ldt->ldt[0], + &new_ldt->ldt[old_ldt->start - begin_sel], + old_ldt->count * sizeof(struct real_descriptor)); + + /* + * If the old and new LDTs are non-overlapping, fill the + * center in with null selectors. + */ + + if (old_ldt->start + old_ldt->count < start_sel) + bzero(&new_ldt->ldt[old_ldt->count], + (start_sel - (old_ldt->start + old_ldt->count)) * sizeof(struct real_descriptor)); + else if (old_ldt->start > start_sel + num_sels) + bzero(&new_ldt->ldt[num_sels], + (old_ldt->start - (start_sel + num_sels)) * sizeof(struct real_descriptor)); } - else if (thr_act == current_thread()) { - struct real_descriptor template = {0, 0, 0, ACC_P, 0, 0 ,0}; - - for (dp = &new_ldt->ldt[0], i = 0; i < first_desc; i++, dp++) { - if (i < LDTSZ) - *dp = *(struct real_descriptor *) &ldt[i]; - else - *dp = template; + + /* + * Install new descriptors. + */ + if (descs != 0) { + err = copyin(descs, (char *)&new_ldt->ldt[start_sel - begin_sel], + num_sels * sizeof(struct real_descriptor)); + if (err != 0) + { + task_unlock(task); + user_ldt_free(new_ldt); + return err; } + } else { + bzero(&new_ldt->ldt[start_sel - begin_sel], num_sels * sizeof(struct real_descriptor)); } - temp = old_ldt; - old_ldt = new_ldt; /* use new LDT from now on */ - new_ldt = temp; /* discard old LDT */ - - pcb->ims.ldt = old_ldt; /* new LDT for thread */ + /* + * Validate descriptors. + * Only allow descriptors with user priviledges. + */ + for (i = 0, dp = (struct real_descriptor *) &new_ldt->ldt[start_sel - begin_sel]; + i < num_sels; + i++, dp++) + { + switch (dp->access & ~ACC_A) { + case 0: + case ACC_P: + /* valid empty descriptor */ + break; + case ACC_P | ACC_PL_U | ACC_DATA: + case ACC_P | ACC_PL_U | ACC_DATA_W: + case ACC_P | ACC_PL_U | ACC_DATA_E: + case ACC_P | ACC_PL_U | ACC_DATA_EW: + case ACC_P | ACC_PL_U | ACC_CODE: + case ACC_P | ACC_PL_U | ACC_CODE_R: + case ACC_P | ACC_PL_U | ACC_CODE_C: + case ACC_P | ACC_PL_U | ACC_CODE_CR: + case ACC_P | ACC_PL_U | ACC_CALL_GATE_16: + case ACC_P | ACC_PL_U | ACC_CALL_GATE: + break; + default: + task_unlock(task); + user_ldt_free(new_ldt); + return EACCES; + } + } } + task->i386_ldt = new_ldt; /* new LDT for task */ + /* - * Install new descriptors. + * Switch to new LDT. We need to do this on all CPUs, since + * another thread in this same task may be currently running, + * and we need to make sure the new LDT is in place + * throughout the task before returning to the user. */ - bcopy((char *)desc_list, - (char *)&old_ldt->ldt[first_desc], - count * sizeof(struct real_descriptor)); + mp_rendezvous_no_intrs(user_ldt_set_action, task); - simple_unlock(&pcb->lock); + task_unlock(task); - if (new_ldt) - kfree((vm_offset_t)new_ldt, - new_ldt->desc.limit_low+1+sizeof(struct real_descriptor)); - - /* - * Free the descriptor list. + /* free old LDT. We can't do this until after we've + * rendezvoused with all CPUs, in case another thread + * in this task was in the process of context switching. */ - (void) vm_map_remove(ipc_kernel_map, vm_map_trunc_page(desc_list), - vm_map_round_page(&desc_list[count]), - VM_MAP_REMOVE_KUNWIRE); - return KERN_SUCCESS; + if (old_ldt) + user_ldt_free(old_ldt); + + *retval = start_sel; + + return 0; } -kern_return_t +int i386_get_ldt( - thread_t thr_act, - int first_selector, - int selector_count, /* number wanted */ - descriptor_list_t *desc_list, /* in/out */ - mach_msg_type_number_t *count) /* in/out */ + int *retval, + uint32_t start_sel, + uint32_t descs, /* out */ + uint32_t num_sels) { - struct user_ldt *user_ldt; - pcb_t pcb = thr_act->machine.pcb; - int first_desc = sel_idx(first_selector); + user_ldt_t user_ldt; + task_t task = current_task(); unsigned int ldt_count; - vm_size_t ldt_size; - vm_size_t size, size_needed; - vm_offset_t addr; - thread_t thread; - - if (thr_act == THREAD_NULL) - return KERN_INVALID_ARGUMENT; - - if (first_desc < 0 || first_desc > 8191) - return KERN_INVALID_ARGUMENT; - if (first_desc + selector_count >= 8192) - return KERN_INVALID_ARGUMENT; - - addr = 0; - size = 0; - - for (;;) { - simple_lock(&pcb->lock); - user_ldt = pcb->ims.ldt; - if (user_ldt == 0) { - simple_unlock(&pcb->lock); - if (addr) - kmem_free(ipc_kernel_map, addr, size); - *count = 0; - return KERN_SUCCESS; - } + kern_return_t err; - /* - * Find how many descriptors we should return. - */ - ldt_count = (user_ldt->desc.limit_low + 1) / - sizeof (struct real_descriptor); - ldt_count -= first_desc; - if (ldt_count > selector_count) - ldt_count = selector_count; + if (start_sel >= 8192) + return EINVAL; + if (start_sel + num_sels > 8192) + return EINVAL; + if (descs == 0) + return EINVAL; - ldt_size = ldt_count * sizeof(struct real_descriptor); + task_lock(task); - /* - * Do we have the memory we need? - */ - if (ldt_count <= *count) - break; /* fits in-line */ + user_ldt = task->i386_ldt; + err = 0; - size_needed = round_page(ldt_size); - if (size_needed <= size) - break; + /* + * copy out the descriptors + */ + + if (user_ldt != 0) + ldt_count = user_ldt->start + user_ldt->count; + else + ldt_count = LDTSZ_MIN; - /* - * Unlock the pcb and allocate more memory - */ - simple_unlock(&pcb->lock); - if (size != 0) - kmem_free(ipc_kernel_map, addr, size); + if (start_sel < ldt_count) + { + unsigned int copy_sels = num_sels; - size = size_needed; + if (start_sel + num_sels > ldt_count) + copy_sels = ldt_count - start_sel; - if (kmem_alloc(ipc_kernel_map, &addr, size) - != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; + err = copyout((char *)(current_ldt() + start_sel), + descs, copy_sels * sizeof(struct real_descriptor)); } - /* - * copy out the descriptors - */ - bcopy((char *)&user_ldt->ldt[first_desc], - (char *)addr, - ldt_size); - *count = ldt_count; - simple_unlock(&pcb->lock); + task_unlock(task); - if (addr) { - vm_size_t size_used, size_left; - vm_map_copy_t memory; + *retval = ldt_count; - /* - * Free any unused memory beyond the end of the last page used - */ - size_used = round_page(ldt_size); - if (size_used != size) - kmem_free(ipc_kernel_map, - addr + size_used, size - size_used); + return err; +} - /* - * Zero the remainder of the page being returned. - */ - size_left = size_used - ldt_size; - if (size_left > 0) - bzero((char *)addr + ldt_size, size_left); +void +user_ldt_free( + user_ldt_t user_ldt) +{ + kfree(user_ldt, sizeof(struct user_ldt) + (user_ldt->count * sizeof(struct real_descriptor))); +} - /* - * Unwire the memory and make it into copyin form. - */ - (void) vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size_used), FALSE); - (void) vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, - (vm_map_size_t)size_used, TRUE, &memory); - *desc_list = (descriptor_list_t) memory; +user_ldt_t +user_ldt_copy( + user_ldt_t user_ldt) +{ + if (user_ldt != NULL) { + size_t size = sizeof(struct user_ldt) + (user_ldt->count * sizeof(struct real_descriptor)); + user_ldt_t new_ldt = (user_ldt_t)kalloc(size); + if (new_ldt != NULL) + bcopy(user_ldt, new_ldt, size); + return new_ldt; } + + return 0; +} - return KERN_SUCCESS; +void +user_ldt_set_action( + void *arg) +{ + task_t arg_task = (task_t)arg; + + if (arg_task == current_task()) { + user_ldt_set(current_thread()); + } } -#endif +/* + * Set the LDT for the given thread on the current CPU. Should be invoked + * with interrupts disabled. + */ void -user_ldt_free( - user_ldt_t user_ldt) +user_ldt_set( + thread_t thread) { - kfree(user_ldt, - user_ldt->desc.limit_low+1+sizeof(struct real_descriptor)); + task_t task = thread->task; + user_ldt_t user_ldt; + + user_ldt = task->i386_ldt; + + if (user_ldt != 0) { + struct real_descriptor *ldtp = (struct real_descriptor *)current_ldt(); + + if (user_ldt->start > LDTSZ_MIN) { + bzero(&ldtp[LDTSZ_MIN], + sizeof(struct real_descriptor) * (user_ldt->start - LDTSZ_MIN)); + } + + bcopy(user_ldt->ldt, &ldtp[user_ldt->start], + sizeof(struct real_descriptor) * (user_ldt->count)); + + gdt_desc_p(USER_LDT)->limit_low = (sizeof(struct real_descriptor) * (user_ldt->start + user_ldt->count)) - 1; + + ml_cpu_set_ldt(USER_LDT); + } else { + ml_cpu_set_ldt(KERNEL_LDT); + } } diff --git a/osfmk/i386/user_ldt.h b/osfmk/i386/user_ldt.h index db0b78e4d..e11cd3072 100644 --- a/osfmk/i386/user_ldt.h +++ b/osfmk/i386/user_ldt.h @@ -57,29 +57,36 @@ /* * User LDT management. * - * Each thread in a task may have its own LDT. + * Each task may have its own LDT. */ +#define LDT_AUTO_ALLOC 0xffffffff + +#ifdef KERNEL #include struct user_ldt { - struct real_descriptor desc; /* descriptor for self */ - struct real_descriptor ldt[1]; /* descriptor table (variable) */ + unsigned int start; /* first descriptor in table */ + unsigned int count; /* how many descriptors in table */ + struct real_descriptor ldt[0]; /* descriptor table (variable) */ }; typedef struct user_ldt * user_ldt_t; -/* - * Check code/stack/data selector values against LDT if present. - */ -#define S_CODE 0 /* code segment */ -#define S_STACK 1 /* stack segment */ -#define S_DATA 2 /* data segment */ - -extern boolean_t selector_check( - thread_t thread, - int sel, - int type); +extern user_ldt_t user_ldt_copy( + user_ldt_t uldt); extern void user_ldt_free( user_ldt_t uldt); +extern void user_ldt_set( + thread_t thread); +#else /* !KERNEL */ +#include + +union ldt_entry; + +__BEGIN_DECLS +int i386_get_ldt(int, union ldt_entry *, int); +int i386_set_ldt(int, const union ldt_entry *, int); +__END_DECLS +#endif /* KERNEL */ #endif /* _I386_USER_LDT_H_ */ diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c index 0874e7a31..c5b03d101 100644 --- a/osfmk/ipc/ipc_object.c +++ b/osfmk/ipc/ipc_object.c @@ -1041,10 +1041,13 @@ const char *ikot_print_array[IKOT_MAX_TYPE] = { "(IOKIT_CONNECT) ", "(IOKIT_OBJECT) ", /* 30 */ "(UPL) ", - /* << new entries here */ + "(MEM_OBJ_CONTROL) ", +/* + * Add new entries here. + * Please keep in sync with kern/ipc_kobject.h + */ "(UNKNOWN) " /* magic catchall */ }; -/* Please keep in sync with kern/ipc_kobject.h */ void ipc_object_print( diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index 8c5d894b5..99a43f5bf 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -1500,15 +1500,32 @@ print_ports(void) PRINT_ONE_PORT_TYPE(PROCESSOR); PRINT_ONE_PORT_TYPE(PSET); PRINT_ONE_PORT_TYPE(PSET_NAME); + PRINT_ONE_PORT_TYPE(TIMER); PRINT_ONE_PORT_TYPE(PAGING_REQUEST); - PRINT_ONE_PORT_TYPE(MEMORY_OBJECT); PRINT_ONE_PORT_TYPE(MIG); + PRINT_ONE_PORT_TYPE(MEMORY_OBJECT); PRINT_ONE_PORT_TYPE(XMM_PAGER); PRINT_ONE_PORT_TYPE(XMM_KERNEL); PRINT_ONE_PORT_TYPE(XMM_REPLY); + PRINT_ONE_PORT_TYPE(UND_REPLY); + PRINT_ONE_PORT_TYPE(HOST_NOTIFY); + PRINT_ONE_PORT_TYPE(HOST_SECURITY); + PRINT_ONE_PORT_TYPE(LEDGER); + PRINT_ONE_PORT_TYPE(MASTER_DEVICE); + PRINT_ONE_PORT_TYPE(TASK_NAME); + PRINT_ONE_PORT_TYPE(SUBSYSTEM); + PRINT_ONE_PORT_TYPE(IO_DONE_QUEUE); + PRINT_ONE_PORT_TYPE(SEMAPHORE); + PRINT_ONE_PORT_TYPE(LOCK_SET); PRINT_ONE_PORT_TYPE(CLOCK); PRINT_ONE_PORT_TYPE(CLOCK_CTRL); - PRINT_ONE_PORT_TYPE(MASTER_DEVICE); + PRINT_ONE_PORT_TYPE(IOKIT_SPARE); + PRINT_ONE_PORT_TYPE(NAMED_ENTRY); + PRINT_ONE_PORT_TYPE(IOKIT_CONNECT); + PRINT_ONE_PORT_TYPE(IOKIT_OBJECT); + PRINT_ONE_PORT_TYPE(UPL); + PRINT_ONE_PORT_TYPE(MEM_OBJ_CONTROL); + PRINT_ONE_PORT_TYPE(UNKNOWN); printf("\nipc_space:\n\n"); printf("NULL KERNEL REPLY PAGER OTHER\n"); diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index f532b5230..7600f56ec 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -582,6 +582,7 @@ mach_msg_overwrite_trap( mach_port_name_t notify = args->notify; mach_vm_address_t rcv_msg_addr = args->rcv_msg; mach_msg_size_t scatter_list_size = 0; /* NOT INITIALIZED - but not used in pactice */ + mach_port_seqno_t temp_seqno = 0; register mach_msg_header_t *hdr; mach_msg_return_t mr = MACH_MSG_SUCCESS; @@ -1488,7 +1489,6 @@ mach_msg_overwrite_trap( slow_copyin: { - mach_port_seqno_t temp_seqno = 0; register mach_port_name_t reply_name = (mach_port_name_t)hdr->msgh_local_port; @@ -1581,7 +1581,6 @@ mach_msg_overwrite_trap( { register ipc_port_t reply_port; - mach_port_seqno_t local_seqno; spl_t s; /* @@ -1658,7 +1657,7 @@ mach_msg_overwrite_trap( * no threads blocked waiting to send. */ dest_port = reply_port; - local_seqno = rcv_mqueue->imq_seqno++; + temp_seqno = rcv_mqueue->imq_seqno++; imq_unlock(rcv_mqueue); splx(s); @@ -1670,7 +1669,7 @@ mach_msg_overwrite_trap( ip_check_unlock(reply_port); if (option & MACH_RCV_TRAILER_MASK) { - trailer->msgh_seqno = local_seqno; + trailer->msgh_seqno = temp_seqno; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); } /* copy out the kernel reply */ @@ -1755,6 +1754,7 @@ mach_msg_overwrite_trap( /* LP64support - have to compute real size as it would be received */ reply_size = ipc_kmsg_copyout_size(kmsg, current_map()) + REQUESTED_TRAILER_SIZE(option); + temp_seqno = trailer->msgh_seqno; if (rcv_size < reply_size) { if (msg_receive_error(kmsg, msg_addr, option, temp_seqno, space) == MACH_RCV_INVALID_DATA) { diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index 695b356c6..a0f3d496e 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -30,6 +30,11 @@ #include /* bcopy */ +#include +#include +#include +#include + int kdp_vm_read( caddr_t, caddr_t, unsigned int); int kdp_vm_write( caddr_t, caddr_t, unsigned int); @@ -89,6 +94,45 @@ static unsigned int breakpoints_initialized = 0; int reattach_wait = 0; int noresume_on_disconnect = 0; +#define MAXCOMLEN 16 + +struct thread_snapshot { + uint32_t snapshot_magic; + thread_t thread_id; + int32_t state; + wait_queue_t wait_queue; + event64_t wait_event; + vm_offset_t kernel_stack; + vm_offset_t reserved_stack; + thread_continue_t continuation; + uint32_t nkern_frames; + char user64_p; + uint32_t nuser_frames; + int32_t pid; + char p_comm[MAXCOMLEN + 1]; +}; + +typedef struct thread_snapshot *thread_snapshot_t; + +extern int +machine_trace_thread(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); +extern int +machine_trace_thread64(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); +extern int +proc_pid(void *p); +extern void +proc_name(int pid, char *buf, int size); +extern void +kdp_snapshot_postflight(void); + +static int +pid_from_task(task_t task); + +int +kdp_stackshot(int pid, uint32_t tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced); + +extern char version[]; + boolean_t kdp_packet( unsigned char *pkt, @@ -374,7 +418,9 @@ kdp_readmem( int plen = *len; kdp_readmem_reply_t *rp = &pkt->readmem_reply; int cnt; - +#if __i386__ + void *pversion = &version; +#endif if (plen < sizeof (*rq)) return (FALSE); @@ -387,7 +433,17 @@ kdp_readmem( unsigned int n = rq->nbytes; dprintf(("kdp_readmem addr %x size %d\n", rq->address, rq->nbytes)); - +#if __i386__ + /* XXX This is a hack to facilitate the "showversion" macro + * on i386, which is used to obtain the kernel version without + * symbols - a pointer to the version string should eventually + * be pinned at a fixed address when an equivalent of the + * VECTORS segment (loaded at a fixed load address, and contains + * a table) is implemented on x86, as with PPC. + */ + if (rq->address == (void *)0x501C) + rq->address = &pversion; +#endif cnt = kdp_vm_read((caddr_t)rq->address, (caddr_t)rp->data, rq->nbytes); rp->error = KDPERR_NO_ERROR; @@ -676,3 +732,97 @@ kdp_remove_all_breakpoints() } return breakpoint_found; } + + +#define MAX_FRAMES 1000 + +static int pid_from_task(task_t task) +{ + int pid = -1; + + if (task->bsd_info) + pid = proc_pid(task->bsd_info); + + return pid; +} + +int +kdp_stackshot(int pid, uint32_t tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced) +{ + uint32_t tracepos = (uint32_t) tracebuf; + uint32_t tracebound = tracepos + tracebuf_size; + uint32_t tracebytes = 0; + int error = 0; + + processor_set_t pset = &default_pset; + task_t task = TASK_NULL; + thread_t thread = THREAD_NULL; + int nframes = trace_options; + thread_snapshot_t tsnap = NULL; + unsigned framesize = 2 * sizeof(vm_offset_t); + + if ((nframes <= 0) || nframes > MAX_FRAMES) + nframes = MAX_FRAMES; + + queue_iterate(&pset->tasks, task, task_t, pset_tasks) { + /* Trace everything, unless a process was specified */ + if ((pid == -1) || (pid == pid_from_task(task))) + queue_iterate(&task->threads, thread, thread_t, task_threads){ + if ((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound) { + error = -1; + goto error_exit; + } +/* Populate the thread snapshot header */ + tsnap = (thread_snapshot_t) tracepos; + tsnap->thread_id = thread; + tsnap->state = thread->state; + tsnap->wait_queue = thread->wait_queue; + tsnap->wait_event = thread->wait_event; + tsnap->kernel_stack = thread->kernel_stack; + tsnap->reserved_stack = thread->reserved_stack; + tsnap->continuation = thread->continuation; +/* Add the BSD process identifiers */ + if ((tsnap->pid = pid_from_task(task)) != -1) + proc_name(tsnap->pid, tsnap->p_comm, MAXCOMLEN + 1); + else + tsnap->p_comm[0] = '\0'; + + tsnap->snapshot_magic = 0xfeedface; + tracepos += sizeof(struct thread_snapshot); + +/* Call through to the machine specific trace routines + * Frames are added past the snapshot header. + */ + if (tsnap->kernel_stack != 0) + tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, FALSE); + tsnap->nkern_frames = tracebytes/(2 * sizeof(vm_offset_t)); + tracepos += tracebytes; + tracebytes = 0; + tsnap->user64_p = 0; +/* Trace user stack, if any */ + if (thread->task->map != kernel_map) { +/* 64-bit task? */ + if (task_has_64BitAddr(thread->task)) { + tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, TRUE); + tsnap->user64_p = 1; + framesize = 2 * sizeof(addr64_t); + } + else { + tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, TRUE); + framesize = 2 * sizeof(vm_offset_t); + } + } + tsnap->nuser_frames = tracebytes/framesize; + tracepos += tracebytes; + tracebytes = 0; + } + } + +error_exit: + /* Release stack snapshot wait indicator */ + kdp_snapshot_postflight(); + + *pbytesTraced = tracepos - tracebuf; + + return error; +} diff --git a/osfmk/kdp/kdp_core.h b/osfmk/kdp/kdp_core.h index a716146be..801a1c7af 100644 --- a/osfmk/kdp/kdp_core.h +++ b/osfmk/kdp/kdp_core.h @@ -73,4 +73,8 @@ void abort_panic_transfer (void); struct corehdr *create_panic_header(unsigned int request, const char *corename, unsigned length, unsigned block); -int kdp_send_panic_pkt (unsigned int request, char *corename, unsigned int length, void *panic_data); +int kdp_send_crashdump_pkt(unsigned int request, char *corename, + unsigned int length, void *panic_data); + +int kdp_send_crashdump_data(unsigned int request, char *corename, + unsigned int length, caddr_t txstart); diff --git a/osfmk/kdp/kdp_internal.h b/osfmk/kdp/kdp_internal.h index 37eccf3ab..c513c91da 100644 --- a/osfmk/kdp/kdp_internal.h +++ b/osfmk/kdp/kdp_internal.h @@ -40,7 +40,8 @@ typedef struct { extern kdp_glob_t kdp; -extern int kdp_flag; +extern volatile int kdp_flag; + #define KDP_READY 0x1 #define KDP_ARP 0x2 #define KDP_BP_DIS 0x4 diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 903054c96..f10b5e060 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -35,14 +35,14 @@ #include #include +#include #include #include #include -#include - #include #include +#include /* kernel_map */ #include @@ -82,7 +82,7 @@ struct { } rmt; } adr; -static char +static const char *exception_message[] = { "Unknown", "Memory access", /* EXC_BAD_ACCESS */ @@ -93,7 +93,7 @@ static char "Breakpoint" /* EXC_BREAKPOINT */ }; -int kdp_flag = 0; +volatile int kdp_flag = 0; static kdp_send_t kdp_en_send_pkt = 0; static kdp_receive_t kdp_en_recv_pkt = 0; @@ -105,23 +105,34 @@ static void *kdp_current_ifp = 0; static void kdp_handler( void *); -static unsigned int panic_server_ip = 0; -static unsigned int parsed_router_ip = 0; -static unsigned int router_ip = 0; -static unsigned int panicd_specified = 0; -static unsigned int router_specified = 0; +static uint32_t panic_server_ip = 0; +static uint32_t parsed_router_ip = 0; +static uint32_t router_ip = 0; +static uint32_t target_ip = 0; + +static volatile boolean_t panicd_specified = FALSE; +static boolean_t router_specified = FALSE; +static unsigned int panicd_port = CORE_REMOTE_PORT; + +/* As in bsd/net/ether_if_module.c */ +static struct ether_addr etherbroadcastaddr = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; static struct ether_addr router_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct ether_addr destination_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct ether_addr temp_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct ether_addr current_resolved_MAC = {{0, 0, 0 , 0, 0, 0}}; -static u_char flag_panic_dump_in_progress = 0; -static u_char flag_router_mac_initialized = 0; +static boolean_t flag_panic_dump_in_progress = FALSE; +static boolean_t flag_router_mac_initialized = FALSE; + +static boolean_t flag_arp_resolved = FALSE; static unsigned int panic_timeout = 100000; static unsigned int last_panic_port = CORE_REMOTE_PORT; unsigned int SEGSIZE = 512; -static unsigned int PANIC_PKTSIZE = 518; +__unused static unsigned int PANIC_PKTSIZE = 518; static char panicd_ip_str[20]; static char router_ip_str[20]; @@ -131,7 +142,52 @@ static volatile unsigned int flag_kdp_trigger_reboot = 0; extern unsigned int not_in_kdp; -extern int kdp_vm_read( caddr_t, caddr_t, unsigned int); +extern unsigned long panic_caller; +extern unsigned int disableConsoleOutput; + +extern int kdp_vm_read( caddr_t, caddr_t, unsigned int); +extern void kdp_call(void); +extern boolean_t kdp_call_kdb(void); +extern int kern_dump(void); + +void * kdp_get_interface(void); +void kdp_set_gateway_mac(void *); +void kdp_set_ip_and_mac_addresses(struct in_addr *, struct ether_addr *); +void kdp_set_interface(void *); + +void kdp_disable_arp(void); +static void kdp_arp_reply(struct ether_arp *); +static void kdp_process_arp_reply(struct ether_arp *); +static boolean_t kdp_arp_resolve(uint32_t, struct ether_addr *); + +static boolean_t gKDPDebug = FALSE; +#define KDP_DEBUG(...) if (gKDPDebug) printf(__VA_ARGS__); + +int kdp_snapshot = 0; +static int stack_snapshot_ret = 0; +static unsigned stack_snapshot_bytes_traced = 0; + +static void *stack_snapshot_buf; +static uint32_t stack_snapshot_bufsize; +static int stack_snapshot_pid; +static uint32_t stack_snapshot_options; + +void +kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, + uint32_t options); + +void +kdp_snapshot_postflight(void); + +extern int +kdp_stackshot(int pid, uint32_t tracebuf, uint32_t tracebuf_size, + unsigned trace_options, uint32_t *pbytesTraced); + +int +kdp_stack_snapshot_geterror(void); + +int +kdp_stack_snapshot_bytes_traced(void); void kdp_register_send_receive( @@ -166,10 +222,13 @@ kdp_register_send_receive( kdp_flag |= PANIC_LOG_DUMP; if (PE_parse_boot_arg ("_panicd_ip", panicd_ip_str)) - panicd_specified = 1; - /* For the future, currently non-functional */ + panicd_specified = TRUE; + if (PE_parse_boot_arg ("_router_ip", router_ip_str)) - router_specified = 1; + router_specified = TRUE; + + if (!PE_parse_boot_arg ("panicd_port", &panicd_port)) + panicd_port = CORE_REMOTE_PORT; kdp_flag |= KDP_READY; if (current_debugger == NO_CUR_DB) @@ -182,8 +241,8 @@ kdp_register_send_receive( void kdp_unregister_send_receive( - kdp_send_t send, - kdp_receive_t receive) + __unused kdp_send_t send, + __unused kdp_receive_t receive) { if (current_debugger == KDP_CUR_DB) current_debugger = NO_CUR_DB; @@ -192,6 +251,35 @@ kdp_unregister_send_receive( kdp_en_recv_pkt = NULL; } +/* Cache stack snapshot parameters in preparation for a trace */ +void +kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t options) +{ + stack_snapshot_pid = pid; + stack_snapshot_buf = tracebuf; + stack_snapshot_bufsize = tracebuf_size; + stack_snapshot_options = options; + kdp_snapshot++; +} + +void +kdp_snapshot_postflight(void) +{ + kdp_snapshot--; +} + +int +kdp_stack_snapshot_geterror(void) +{ + return stack_snapshot_ret; +} + +int +kdp_stack_snapshot_bytes_traced(void) +{ + return stack_snapshot_bytes_traced; +} + static void enaddr_copy( void *src, @@ -205,92 +293,92 @@ static unsigned short ip_sum( unsigned char *c, unsigned int hlen -) + ) { - unsigned int high, low, sum; + unsigned int high, low, sum; - high = low = 0; - while (hlen-- > 0) { - low += c[1] + c[3]; - high += c[0] + c[2]; + high = low = 0; + while (hlen-- > 0) { + low += c[1] + c[3]; + high += c[0] + c[2]; - c += sizeof (int); - } + c += sizeof (int); + } - sum = (high << 8) + low; - sum = (sum >> 16) + (sum & 65535); + sum = (high << 8) + low; + sum = (sum >> 16) + (sum & 65535); - return (sum > 65535 ? sum - 65535 : sum); + return (sum > 65535 ? sum - 65535 : sum); } static void kdp_reply( unsigned short reply_port -) + ) { - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - struct in_addr tmp_ipaddr; - struct ether_addr tmp_enaddr; - struct ether_header *eh; + struct udpiphdr aligned_ui, *ui = &aligned_ui; + struct ip aligned_ip, *ip = &aligned_ip; + struct in_addr tmp_ipaddr; + struct ether_addr tmp_enaddr; + struct ether_header *eh = NULL; - if (!pkt.input) - kdp_panic("kdp_reply"); + if (!pkt.input) + kdp_panic("kdp_reply"); - pkt.off -= sizeof (struct udpiphdr); + pkt.off -= sizeof (struct udpiphdr); #if DO_ALIGN - bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); + bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ui = (struct udpiphdr *)&pkt.data[pkt.off]; #endif - ui->ui_next = ui->ui_prev = 0; - ui->ui_x1 = 0; - ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); - tmp_ipaddr = ui->ui_src; - ui->ui_src = ui->ui_dst; - ui->ui_dst = tmp_ipaddr; - ui->ui_sport = htons(KDP_REMOTE_PORT); - ui->ui_dport = reply_port; - ui->ui_ulen = ui->ui_len; - ui->ui_sum = 0; + ui->ui_next = ui->ui_prev = 0; + ui->ui_x1 = 0; + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); + tmp_ipaddr = ui->ui_src; + ui->ui_src = ui->ui_dst; + ui->ui_dst = tmp_ipaddr; + ui->ui_sport = htons(KDP_REMOTE_PORT); + ui->ui_dport = reply_port; + ui->ui_ulen = ui->ui_len; + ui->ui_sum = 0; #if DO_ALIGN - bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); - bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); + bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); + bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ip = (struct ip *)&pkt.data[pkt.off]; + ip = (struct ip *)&pkt.data[pkt.off]; #endif - ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); - ip->ip_v = IPVERSION; - ip->ip_id = htons(ip_id++); - ip->ip_hl = sizeof (struct ip) >> 2; - ip->ip_ttl = udp_ttl; - ip->ip_sum = 0; - ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); + ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); + ip->ip_v = IPVERSION; + ip->ip_id = htons(ip_id++); + ip->ip_hl = sizeof (struct ip) >> 2; + ip->ip_ttl = udp_ttl; + ip->ip_sum = 0; + ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); #if DO_ALIGN - bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); + bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += sizeof (struct udpiphdr); + pkt.len += sizeof (struct udpiphdr); - pkt.off -= sizeof (struct ether_header); + pkt.off -= sizeof (struct ether_header); - eh = (struct ether_header *)&pkt.data[pkt.off]; - enaddr_copy(eh->ether_shost, &tmp_enaddr); - enaddr_copy(eh->ether_dhost, eh->ether_shost); - enaddr_copy(&tmp_enaddr, eh->ether_dhost); - eh->ether_type = htons(ETHERTYPE_IP); + eh = (struct ether_header *)&pkt.data[pkt.off]; + enaddr_copy(eh->ether_shost, &tmp_enaddr); + enaddr_copy(eh->ether_dhost, eh->ether_shost); + enaddr_copy(&tmp_enaddr, eh->ether_dhost); + eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += sizeof (struct ether_header); + pkt.len += sizeof (struct ether_header); - // save reply for possible retransmission - bcopy((char *)&pkt, (char *)&saved_reply, sizeof(pkt)); + // save reply for possible retransmission + bcopy((char *)&pkt, (char *)&saved_reply, sizeof(pkt)); - (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); + (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); - // increment expected sequence number - exception_seq++; + // increment expected sequence number + exception_seq++; } static void @@ -379,7 +467,7 @@ void kdp_set_gateway_mac(void *gatewaymac) { router_mac = *(struct ether_addr *)gatewaymac; - flag_router_mac_initialized = 1; + flag_router_mac_initialized = TRUE; } struct ether_addr @@ -394,16 +482,58 @@ kdp_get_ip_address(void) return kdp_current_ip_address; } +void +kdp_disable_arp(void) +{ + kdp_flag &= ~(DB_ARP); +} + +static void +kdp_arp_dispatch(void) +{ + struct ether_arp aligned_ea, *ea = &aligned_ea; + unsigned arp_header_offset; + + arp_header_offset = sizeof(struct ether_header) + pkt.off; + memcpy((void *)ea, (void *)&pkt.data[arp_header_offset], sizeof(*ea)); + + switch(ntohs(ea->arp_op)) { + case ARPOP_REQUEST: + kdp_arp_reply(ea); + break; + case ARPOP_REPLY: + kdp_process_arp_reply(ea); + break; + default: + return; + } +} + +static void +kdp_process_arp_reply(struct ether_arp *ea) +{ + /* Are we interested in ARP replies? */ + if (flag_arp_resolved == TRUE) + return; + + /* Did we receive a reply from the right source? */ + if (((struct in_addr *)(ea->arp_spa))->s_addr != target_ip) + return; + + flag_arp_resolved = TRUE; + current_resolved_MAC = *(struct ether_addr *) (ea->arp_sha); + + return; +} + /* ARP responses are enabled when the DB_ARP bit of the debug boot arg - is set. A workaround if you don't want to reboot is to set - kdpDEBUGFlag &= DB_ARP when connected (but that certainly isn't a published - interface!) -*/ + * is set. + */ + static void -kdp_arp_reply(void) +kdp_arp_reply(struct ether_arp *ea) { struct ether_header *eh; - struct ether_arp aligned_ea, *ea = &aligned_ea; struct in_addr isaddr, itaddr, myaddr; struct ether_addr my_enaddr; @@ -411,20 +541,25 @@ kdp_arp_reply(void) eh = (struct ether_header *)&pkt.data[pkt.off]; pkt.off += sizeof(struct ether_header); - memcpy((void *)ea, (void *)&pkt.data[pkt.off],sizeof(*ea)); - if(ntohs(ea->arp_op) != ARPOP_REQUEST) return; myaddr.s_addr = kdp_get_ip_address(); my_enaddr = kdp_get_mac_addr(); - if (!(myaddr.s_addr) || !(my_enaddr.ether_addr_octet[1])) + if ((ntohl(myaddr.s_addr) == 0) || + ((my_enaddr.ether_addr_octet[0] & 0xff) == 0 + && (my_enaddr.ether_addr_octet[1] & 0xff) == 0 + && (my_enaddr.ether_addr_octet[2] & 0xff) == 0 + && (my_enaddr.ether_addr_octet[3] & 0xff) == 0 + && (my_enaddr.ether_addr_octet[4] & 0xff) == 0 + && (my_enaddr.ether_addr_octet[5] & 0xff) == 0 + )) return; (void)memcpy((void *)&isaddr, (void *)ea->arp_spa, sizeof (isaddr)); (void)memcpy((void *)&itaddr, (void *)ea->arp_tpa, sizeof (itaddr)); - + if (itaddr.s_addr == myaddr.s_addr) { (void)memcpy((void *)ea->arp_tha, (void *)ea->arp_sha, sizeof(ea->arp_sha)); (void)memcpy((void *)ea->arp_sha, (void *)&my_enaddr, sizeof(ea->arp_sha)); @@ -447,100 +582,185 @@ kdp_arp_reply(void) static void kdp_poll(void) { - struct ether_header *eh; - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - static int msg_printed; - + struct ether_header *eh = NULL; + struct udpiphdr aligned_ui, *ui = &aligned_ui; + struct ip aligned_ip, *ip = &aligned_ip; + static int msg_printed; - if (pkt.input) - kdp_panic("kdp_poll"); + if (pkt.input) + kdp_panic("kdp_poll"); - if (!kdp_en_recv_pkt || !kdp_en_send_pkt) { - if( msg_printed == 0) { - msg_printed = 1; - printf("kdp_poll: no debugger device\n"); + if (!kdp_en_recv_pkt || !kdp_en_send_pkt) { + if( msg_printed == 0) { + msg_printed = 1; + printf("kdp_poll: no debugger device\n"); + } + return; } - return; - } - pkt.off = pkt.len = 0; - (*kdp_en_recv_pkt)(pkt.data, &pkt.len, 3/* ms */); - - if (pkt.len == 0) - return; + pkt.off = pkt.len = 0; + (*kdp_en_recv_pkt)(pkt.data, &pkt.len, 3/* ms */); - if (pkt.len >= sizeof(struct ether_header)) - { - eh = (struct ether_header *)&pkt.data[pkt.off]; - - if (kdp_flag & KDP_ARP) - { - if (ntohs(eh->ether_type) == ETHERTYPE_ARP) - { - kdp_arp_reply(); + if (pkt.len == 0) return; - } - } - } - if (pkt.len < (sizeof (struct ether_header) + sizeof (struct udpiphdr))) - return; + if (pkt.len >= sizeof(struct ether_header)) + { + eh = (struct ether_header *)&pkt.data[pkt.off]; + + if (kdp_flag & KDP_ARP) + { + if (ntohs(eh->ether_type) == ETHERTYPE_ARP) + { + kdp_arp_dispatch(); + return; + } + } + } + + if (pkt.len < (sizeof (struct ether_header) + sizeof (struct udpiphdr))) + return; - pkt.off += sizeof (struct ether_header); - if (ntohs(eh->ether_type) != ETHERTYPE_IP) { - return; - } + pkt.off += sizeof (struct ether_header); + if (ntohs(eh->ether_type) != ETHERTYPE_IP) { + return; + } #if DO_ALIGN - bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); - bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); + bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); + bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; - ip = (struct ip *)&pkt.data[pkt.off]; + ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ip = (struct ip *)&pkt.data[pkt.off]; #endif - pkt.off += sizeof (struct udpiphdr); - if (ui->ui_pr != IPPROTO_UDP) { - return; - } + pkt.off += sizeof (struct udpiphdr); + if (ui->ui_pr != IPPROTO_UDP) { + return; + } - if (ip->ip_hl > (sizeof (struct ip) >> 2)) { - return; - } + if (ip->ip_hl > (sizeof (struct ip) >> 2)) { + return; + } - if (ntohs(ui->ui_dport) != KDP_REMOTE_PORT) { - if (CORE_REMOTE_PORT == (ntohs(ui->ui_dport)) && - flag_panic_dump_in_progress) { - last_panic_port = ui->ui_sport; + if (ntohs(ui->ui_dport) != KDP_REMOTE_PORT) { + if (panicd_port == (ntohs(ui->ui_dport)) && + flag_panic_dump_in_progress) { + last_panic_port = ui->ui_sport; + } + else + return; } - else - return; - } - /* If we receive a kernel debugging packet whilst a - * core dump is in progress, abort the transfer and - * enter the debugger. - */ - else - if (flag_panic_dump_in_progress) - { - abort_panic_transfer(); - return; + /* If we receive a kernel debugging packet whilst a + * core dump is in progress, abort the transfer and + * enter the debugger. + */ + else + if (flag_panic_dump_in_progress) + { + abort_panic_transfer(); + return; + } + + if (!kdp.is_conn && !flag_panic_dump_in_progress) { + enaddr_copy(eh->ether_dhost, &adr.loc.ea); + adr.loc.in = ui->ui_dst; + + enaddr_copy(eh->ether_shost, &adr.rmt.ea); + adr.rmt.in = ui->ui_src; } - if (!kdp.is_conn && !flag_panic_dump_in_progress) { - enaddr_copy(eh->ether_dhost, &adr.loc.ea); - adr.loc.in = ui->ui_dst; + /* + * Calculate kdp packet length. + */ + pkt.len = ntohs((u_short)ui->ui_ulen) - sizeof (struct udphdr); + pkt.input = TRUE; +} - enaddr_copy(eh->ether_shost, &adr.rmt.ea); - adr.rmt.in = ui->ui_src; - } +/* Create and transmit an ARP resolution request for the target IP address. + * This is modeled on ether_inet_arp()/RFC 826. + */ - /* - * Calculate kdp packet length. - */ - pkt.len = ntohs((u_short)ui->ui_ulen) - sizeof (struct udphdr); - pkt.input = TRUE; +static void +transmit_ARP_request(uint32_t ip_addr) +{ + struct ether_header *eh = (struct ether_header *) &pkt.data[0]; + struct ether_arp *ea = (struct ether_arp *) &pkt.data[sizeof(struct ether_header)]; + + KDP_DEBUG("Transmitting ARP request\n"); + /* Populate the ether_header */ + eh->ether_type = htons(ETHERTYPE_ARP); + enaddr_copy(&kdp_current_mac_address, eh->ether_shost); + enaddr_copy(ðerbroadcastaddr, eh->ether_dhost); + + /* Populate the ARP header */ + ea->arp_pro = htons(ETHERTYPE_IP); + ea->arp_hln = sizeof(ea->arp_sha); + ea->arp_pln = sizeof(ea->arp_spa); + ea->arp_hrd = htons(ARPHRD_ETHER); + ea->arp_op = htons(ARPOP_REQUEST); + + /* Target fields */ + enaddr_copy(ðerbroadcastaddr, ea->arp_tha); + memcpy(ea->arp_tpa, (void *) &ip_addr, sizeof(ip_addr)); + + /* Source fields */ + enaddr_copy(&kdp_current_mac_address, ea->arp_sha); + memcpy(ea->arp_spa, (void *) &kdp_current_ip_address, sizeof(kdp_current_ip_address)); + + pkt.off = 0; + pkt.len = sizeof(struct ether_header) + sizeof(struct ether_arp); + /* Transmit */ + (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); +} + +static boolean_t +kdp_arp_resolve(uint32_t arp_target_ip, struct ether_addr *resolved_MAC) +{ + int poll_count = 256; /* ~770 ms modulo broadcast/delayed traffic? */ + char tretries = 0; + +#define NUM_ARP_TX_RETRIES 5 + + target_ip = arp_target_ip; + flag_arp_resolved = FALSE; + +TRANSMIT_RETRY: + pkt.off = pkt.len = 0; + + tretries++; + + if (tretries >= NUM_ARP_TX_RETRIES) { + return FALSE; + } + + KDP_DEBUG("ARP TX attempt #%d \n", tretries); + + transmit_ARP_request(arp_target_ip); + + while (!pkt.input && !flag_arp_resolved && flag_panic_dump_in_progress && --poll_count) { + kdp_poll(); + } + + if (flag_arp_resolved) { + *resolved_MAC = current_resolved_MAC; + return TRUE; + } + + if (!flag_panic_dump_in_progress || pkt.input) /* we received a debugging packet, bail*/ + { + printf("Received a debugger packet,transferring control to debugger\n"); + /* Indicate that we should wait in the debugger when we return */ + kdp_flag |= DBG_POST_CORE; + pkt.input = FALSE; + return FALSE; + } + else /* We timed out */ + if (0 == poll_count) { + poll_count = 256; + goto TRANSMIT_RETRY; + } + return FALSE; } static void @@ -551,7 +771,6 @@ kdp_handler( unsigned short reply_port; kdp_hdr_t aligned_hdr, *hdr = &aligned_hdr; - kdp.saved_state = saved_state; // see comment in kdp_raise_exception do { @@ -599,10 +818,15 @@ static void kdp_connection_wait(void) { unsigned short reply_port; - boolean_t kdp_call_kdb(); struct ether_addr kdp_mac_addr = kdp_get_mac_addr(); unsigned int ip_addr = ntohl(kdp_get_ip_address()); + /* + * Do both a printf() and a kprintf() of the MAC and IP so that + * they will print out on headless machines but not be added to + * the panic.log + */ + printf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", kdp_mac_addr.ether_addr_octet[0] & 0xff, kdp_mac_addr.ether_addr_octet[1] & 0xff, @@ -611,12 +835,26 @@ kdp_connection_wait(void) kdp_mac_addr.ether_addr_octet[4] & 0xff, kdp_mac_addr.ether_addr_octet[5] & 0xff); + kprintf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", + kdp_mac_addr.ether_addr_octet[0] & 0xff, + kdp_mac_addr.ether_addr_octet[1] & 0xff, + kdp_mac_addr.ether_addr_octet[2] & 0xff, + kdp_mac_addr.ether_addr_octet[3] & 0xff, + kdp_mac_addr.ether_addr_octet[4] & 0xff, + kdp_mac_addr.ether_addr_octet[5] & 0xff); + printf( "ip address: %d.%d.%d.%d\n", (ip_addr & 0xff000000) >> 24, (ip_addr & 0xff0000) >> 16, (ip_addr & 0xff00) >> 8, (ip_addr & 0xff)); + kprintf( "ip address: %d.%d.%d.%d\n", + (ip_addr & 0xff000000) >> 24, + (ip_addr & 0xff0000) >> 16, + (ip_addr & 0xff00) >> 8, + (ip_addr & 0xff)); + printf("\nWaiting for remote debugger connection.\n"); if (reattach_wait == 0) { @@ -754,8 +992,6 @@ kdp_raise_exception( { int index; - extern unsigned int disableConsoleOutput; - disable_preemption(); if (saved_state == 0) @@ -781,11 +1017,19 @@ kdp_raise_exception( if (pkt.input) kdp_panic("kdp_raise_exception"); + /* Was a system trace requested ? */ + if (kdp_snapshot && (panicstr == ((char *) 0)) && (panic_caller == 0) && !kdp.is_conn) { + /* XXX This should be reworked to take a pointer to the buffer */ + stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid, + (uint32_t) stack_snapshot_buf, stack_snapshot_bufsize, + stack_snapshot_options, &stack_snapshot_bytes_traced); + goto exit_raise_exception; + } + if (((kdp_flag & KDP_PANIC_DUMP_ENABLED) || (kdp_flag & PANIC_LOG_DUMP)) && (panicstr != (char *) 0)) { - kdp_panic_dump(); - + kdp_panic_dump(); } else if ((kdp_flag & PANIC_CORE_ON_NMI) && (panicstr == (char *) 0) && @@ -860,218 +1104,214 @@ kdp_reset(void) struct corehdr * create_panic_header(unsigned int request, const char *corename, - unsigned length, unsigned int block) + unsigned length, unsigned int block) { - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - struct ether_header *eh; - struct corehdr *coreh; - const char *mode = "octet"; - char modelen = strlen(mode); + struct udpiphdr aligned_ui, *ui = &aligned_ui; + struct ip aligned_ip, *ip = &aligned_ip; + struct ether_header *eh; + struct corehdr *coreh; + const char *mode = "octet"; + char modelen = strlen(mode); - pkt.off = sizeof (struct ether_header); - pkt.len = length + ((request == KDP_WRQ) ? modelen : 0) + - (corename ? strlen(corename): 0) + sizeof(struct corehdr); + pkt.off = sizeof (struct ether_header); + pkt.len = length + ((request == KDP_WRQ) ? modelen : 0) + + (corename ? strlen(corename): 0) + sizeof(struct corehdr); #if DO_ALIGN - bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); + bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ui = (struct udpiphdr *)&pkt.data[pkt.off]; #endif - ui->ui_next = ui->ui_prev = 0; - ui->ui_x1 = 0; - ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); - ui->ui_src.s_addr = htonl(kdp_current_ip_address); - ui->ui_dst.s_addr = panic_server_ip; - ui->ui_sport = htons(CORE_REMOTE_PORT); - ui->ui_dport = ((request == KDP_WRQ) ? htons(CORE_REMOTE_PORT) : last_panic_port); - ui->ui_ulen = ui->ui_len; - ui->ui_sum = 0; + ui->ui_next = ui->ui_prev = 0; + ui->ui_x1 = 0; + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); + ui->ui_src.s_addr = kdp_current_ip_address; + /* Already in network byte order via inet_aton() */ + ui->ui_dst.s_addr = panic_server_ip; + ui->ui_sport = htons(panicd_port); + ui->ui_dport = ((request == KDP_WRQ) ? htons(panicd_port) : last_panic_port); + ui->ui_ulen = ui->ui_len; + ui->ui_sum = 0; #if DO_ALIGN - bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); - bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); + bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); + bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ip = (struct ip *)&pkt.data[pkt.off]; + ip = (struct ip *)&pkt.data[pkt.off]; #endif - ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); - ip->ip_v = IPVERSION; - ip->ip_id = htons(ip_id++); - ip->ip_hl = sizeof (struct ip) >> 2; - ip->ip_ttl = udp_ttl; - ip->ip_sum = 0; - ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); + ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); + ip->ip_v = IPVERSION; + ip->ip_id = htons(ip_id++); + ip->ip_hl = sizeof (struct ip) >> 2; + ip->ip_ttl = udp_ttl; + ip->ip_sum = 0; + ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); #if DO_ALIGN - bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); + bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += sizeof (struct udpiphdr); + pkt.len += sizeof (struct udpiphdr); - pkt.off += sizeof (struct udpiphdr); + pkt.off += sizeof (struct udpiphdr); - coreh = (struct corehdr *) &pkt.data[pkt.off]; - coreh->th_opcode = htons((u_short)request); + coreh = (struct corehdr *) &pkt.data[pkt.off]; + coreh->th_opcode = htons((u_short)request); - if (request == KDP_WRQ) - { - register char *cp; - - cp = coreh->th_u.tu_rpl; - strcpy (cp, corename); - cp += strlen(corename); - *cp++ = '\0'; - strcpy (cp, mode); - cp+= modelen; - *cp++ = '\0'; - } - else - { - coreh->th_block = htonl((unsigned int) block); - } + if (request == KDP_WRQ) + { + register char *cp; + + cp = coreh->th_u.tu_rpl; + strcpy (cp, corename); + cp += strlen(corename); + *cp++ = '\0'; + strcpy (cp, mode); + cp+= modelen; + *cp++ = '\0'; + } + else + { + coreh->th_block = htonl((unsigned int) block); + } - pkt.off -= sizeof (struct udpiphdr); - pkt.off -= sizeof (struct ether_header); + pkt.off -= sizeof (struct udpiphdr); + pkt.off -= sizeof (struct ether_header); - eh = (struct ether_header *)&pkt.data[pkt.off]; - enaddr_copy(&kdp_current_mac_address, eh->ether_shost); - enaddr_copy(&router_mac, eh->ether_dhost); - eh->ether_type = htons(ETHERTYPE_IP); + eh = (struct ether_header *)&pkt.data[pkt.off]; + enaddr_copy(&kdp_current_mac_address, eh->ether_shost); + enaddr_copy(&destination_mac, eh->ether_dhost); + eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += sizeof (struct ether_header); - return coreh; + pkt.len += sizeof (struct ether_header); + return coreh; } -int kdp_send_panic_packets (unsigned int request, char *corename, - unsigned int length, unsigned int txstart) +int kdp_send_crashdump_data(unsigned int request, char *corename, + unsigned int length, caddr_t txstart) { - unsigned int txend = txstart + length; - int panic_error = 0; + caddr_t txend = txstart + length; + int panic_error = 0; - if (length <= SEGSIZE) { - if ((panic_error = kdp_send_panic_pkt (request, corename, length, (caddr_t) txstart)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); - return panic_error ; - } - } - else - { - while (txstart <= (txend - SEGSIZE)) { - if ((panic_error = kdp_send_panic_pkt (KDP_DATA, NULL, SEGSIZE, (caddr_t) txstart)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); - return panic_error; + if (length <= SEGSIZE) { + if ((panic_error = kdp_send_crashdump_pkt(request, corename, length, (caddr_t) txstart)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + return panic_error ; + } } - txstart += SEGSIZE; - if (!(panic_block % 2000)) - printf("."); - } - if (txstart < txend) { - kdp_send_panic_pkt(request, corename, (txend - txstart), (caddr_t) txstart); - } - } - return 0; + else + { + while (txstart <= (txend - SEGSIZE)) { + if ((panic_error = kdp_send_crashdump_pkt(KDP_DATA, NULL, SEGSIZE, txstart)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + return panic_error; + } + txstart += SEGSIZE; + if (!(panic_block % 2000)) + printf("."); + } + if (txstart < txend) { + kdp_send_crashdump_pkt(request, corename, (txend - txstart), txstart); + } + } + return 0; } -int -kdp_send_panic_pkt (unsigned int request, char *corename, - unsigned int length, void *panic_data) +int +kdp_send_crashdump_pkt(unsigned int request, char *corename, + unsigned int length, void *panic_data) { - struct corehdr *th = NULL; - int poll_count = 2500; + struct corehdr *th = NULL; + int poll_count = 2500; - char rretries = 0, tretries = 0; - /* - extern signed long gIODebuggerSemaphore; - */ - pkt.off = pkt.len = 0; + char rretries = 0, tretries = 0; + + pkt.off = pkt.len = 0; - if (request == KDP_WRQ) /* longer timeout for initial request */ - poll_count += 1000; + if (request == KDP_WRQ) /* longer timeout for initial request */ + poll_count += 1000; TRANSMIT_RETRY: - tretries++; + tretries++; - if (tretries > 2) - printf("TX retry #%d ", tretries ); - - if (tretries >=15) { - /* This iokit layer issue can potentially - *cause a hang, uncomment to check if it's happening. - */ - /* - if (gIODebuggerSemaphore) - printf("The gIODebuggerSemaphore is raised, preventing packet transmission (2760413)\n"); - */ - - printf ("Cannot contact panic server, timing out.\n"); - return (-3); - } - - th = create_panic_header(request, corename, length, panic_block); + if (tretries >=15) { +/* The crashdump server is unreachable for some reason. This could be a network + * issue or, if we've been especially unfortunate, we've hit Radar 2760413, + * which is a long standing problem with the IOKit polled mode network driver + * shim which can prevent transmits/receives completely. + */ + printf ("Cannot contact panic server, timing out.\n"); + return (-3); + } - if (request == KDP_DATA || request == KDP_SEEK) { - if (!kdp_vm_read ((caddr_t) panic_data, (caddr_t) th->th_data, length)) { - memset ((caddr_t) th->th_data, 'X', length); - } - } + if (tretries > 2) + printf("TX retry #%d ", tretries ); + + th = create_panic_header(request, corename, length, panic_block); - (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); + if (request == KDP_DATA) { + if (!kdp_vm_read((caddr_t) panic_data, (caddr_t) th->th_data, length)) { + memset ((caddr_t) th->th_data, 'X', length); + } + } + else if (request == KDP_SEEK) { + *(unsigned int *) th->th_data = htonl(*(unsigned int *) panic_data); + } - /* Now we have to listen for the ACK */ - RECEIVE_RETRY: + (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); - while (!pkt.input && flag_panic_dump_in_progress && poll_count) { - kdp_poll(); - poll_count--; - } + /* Listen for the ACK */ +RECEIVE_RETRY: + while (!pkt.input && flag_panic_dump_in_progress && poll_count) { + kdp_poll(); + poll_count--; + } - if (pkt.input) { + if (pkt.input) { - pkt.input = FALSE; + pkt.input = FALSE; - th = (struct corehdr *) &pkt.data[pkt.off]; - /* These will eventually have to be ntoh[ls]'ed as appropriate */ + th = (struct corehdr *) &pkt.data[pkt.off]; - if (th->th_opcode == KDP_ACK && th->th_block == panic_block) { - } - else - if (th->th_opcode == KDP_ERROR) { - printf("Panic server returned error %d, retrying\n", th->th_code); - poll_count = 1000; - goto TRANSMIT_RETRY; - } - else - if (th->th_block == (panic_block -1)) { - printf("RX retry "); - if (++rretries > 1) - goto TRANSMIT_RETRY; - else - goto RECEIVE_RETRY; + if (ntohs(th->th_opcode) == KDP_ACK && ntohl(th->th_block) == panic_block) { + } + else + if (ntohs(th->th_opcode) == KDP_ERROR) { + printf("Panic server returned error %d, retrying\n", ntohl(th->th_code)); + poll_count = 1000; + goto TRANSMIT_RETRY; + } + else + if (ntohl(th->th_block) == (panic_block - 1)) { + printf("RX retry "); + if (++rretries > 1) + goto TRANSMIT_RETRY; + else + goto RECEIVE_RETRY; + } } - } - else - if (!flag_panic_dump_in_progress) /* we received a debugging packet, bail*/ - { - printf("Received a debugger packet,transferring control to debugger\n"); - /* Configure that if not set ..*/ - kdp_flag |= DBG_POST_CORE; - return (-2); - } - else /* We timed out */ - if (0 == poll_count) { - poll_count = 1000; - kdp_us_spin ((tretries%4) * panic_timeout); /* capped linear backoff */ - goto TRANSMIT_RETRY; - } + else + if (!flag_panic_dump_in_progress) /* we received a debugging packet, bail*/ + { + printf("Received a debugger packet,transferring control to debugger\n"); + /* Configure that if not set ..*/ + kdp_flag |= DBG_POST_CORE; + return (-2); + } + else /* We timed out */ + if (0 == poll_count) { + poll_count = 1000; + kdp_us_spin ((tretries%4) * panic_timeout); /* capped linear backoff */ + goto TRANSMIT_RETRY; + } - panic_block++; + panic_block++; - if (request == KDP_EOF) - printf ("\nTotal number of packets transmitted: %d\n", panic_block); + if (request == KDP_EOF) + printf("\nTotal number of packets transmitted: %d\n", panic_block); - return 1; + return 1; } -/* Since we don't seem to have an isdigit() .. */ static int isdigit (char c) { @@ -1079,10 +1319,7 @@ isdigit (char c) } /* From user mode Libc - this ought to be in a library */ static char * -strnstr(s, find, slen) - const char *s; - const char *find; - size_t slen; +strnstr(char *s, const char *find, size_t slen) { char c, sc; size_t len; @@ -1102,148 +1339,172 @@ strnstr(s, find, slen) return ((char *)s); } +extern char version[]; + /* Horrid hack to extract xnu version if possible - a much cleaner approach * would be to have the integrator run a script which would copy the * xnu version into a string or an int somewhere at project submission * time - makes assumptions about sizeof(version), but will not fail if * it changes, but may be incorrect. */ - +/* 2006: Incorporated a change from Darwin user P. Lovell to extract + * the minor kernel version numbers from the version string. + */ static int kdp_get_xnu_version(char *versionbuf) { - extern const char version[]; - char *versionpos; - char vstr[10]; - int retval = -1; - - strcpy(vstr, "custom"); - if (version) { - if (kdp_vm_read(version, versionbuf, 90)) { - - versionbuf[89] = '\0'; - - versionpos = strnstr(versionbuf, "xnu-", 80); - - if (versionpos) { - strncpy (vstr, versionpos, (isdigit (versionpos[7]) ? 8 : 7)); - vstr[(isdigit (versionpos[7]) ? 8 : 7)] = '\0'; - retval = 0; - } - } - } - strcpy(versionbuf, vstr); - return retval; + + char *versionpos; + char vstr[20]; + int retval = -1; + char *vptr; + + strcpy(vstr, "custom"); + if (version) { + if (kdp_vm_read(version, versionbuf, 95)) { + versionbuf[94] = '\0'; + versionpos = strnstr(versionbuf, "xnu-", 90); + if (versionpos) { + strncpy(vstr, versionpos, sizeof(vstr)); + vstr[sizeof(vstr)-1] = '\0'; + vptr = vstr + 4; /* Begin after "xnu-" */ + while (*vptr && (isdigit(*vptr) || *vptr == '.')) + vptr++; + *vptr = '\0'; + /* Remove trailing period, if any */ + if (*(--vptr) == '.') + *vptr = '\0'; + retval = 0; + } + } + } + strcpy(versionbuf, vstr); + return retval; } +extern char *inet_aton(const char *cp, struct in_addr *pin); +extern int snprintf(char *str, size_t size, const char *format, ...); + /* Primary dispatch routine for the system dump */ void kdp_panic_dump() { - char corename[50]; - char coreprefix[10]; - int panic_error; - - extern vm_map_t kernel_map; + char corename[50]; + char coreprefix[10]; + int panic_error; - extern char *inet_aton(const char *cp, struct in_addr *pin); + uint64_t abstime; + uint32_t current_ip = ntohl(kdp_current_ip_address); - uint64_t abstime; - - printf ("Entering system dump routine\n"); + if (flag_panic_dump_in_progress) { + printf("System dump aborted.\n"); + goto panic_dump_exit; + } + + printf("Entering system dump routine\n"); - if (!panicd_specified) { - printf ("A panic server was not specified in the boot-args, terminating kernel core dump.\n"); - goto panic_dump_exit; - } + if (!panicd_specified) { + printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n"); + goto panic_dump_exit; + } + + if (current_ip == 0) { + printf("System dump failed: An IP address isn't assigned to this machine.\n"); + return; + } - flag_panic_dump_in_progress = 1; - not_in_kdp = 0; + flag_panic_dump_in_progress = TRUE; + not_in_kdp = 0; - if (pkt.input) - kdp_panic("kdp_panic_dump"); + if (pkt.input) + kdp_panic("kdp_panic_dump: unexpected pending input packet"); - kdp_get_xnu_version((char *) &pkt.data[0]); + kdp_get_xnu_version((char *) &pkt.data[0]); - /* Panic log bit takes precedence over core dump bit */ - if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) - strncpy(coreprefix, "paniclog", sizeof(coreprefix)); - else - strncpy(coreprefix, "core", sizeof(coreprefix)); + /* Panic log bit takes precedence over core dump bit */ + if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) + strncpy(coreprefix, "paniclog", sizeof(coreprefix)); + else + strncpy(coreprefix, "core", sizeof(coreprefix)); - abstime = mach_absolute_time(); - pkt.data[10] = '\0'; - snprintf (corename, sizeof(corename), "%s-%s-%d.%d.%d.%d-%x", + abstime = mach_absolute_time(); + pkt.data[20] = '\0'; + snprintf (corename, sizeof(corename), "%s-%s-%d.%d.%d.%d-%x", coreprefix, &pkt.data[0], - (kdp_current_ip_address & 0xff000000) >> 24, - (kdp_current_ip_address & 0xff0000) >> 16, - (kdp_current_ip_address & 0xff00) >> 8, - (kdp_current_ip_address & 0xff), - (unsigned int) (abstime & 0xffffffff)); - - if (0 == inet_aton(panicd_ip_str, (struct in_addr *) &panic_server_ip)) { - printf("inet_aton() failed interpreting %s as a panic server IP\n", - panicd_ip_str); - } - else - printf("Attempting connection to panic server configured at IP %s\n", - panicd_ip_str); + (current_ip & 0xff000000) >> 24, + (current_ip & 0xff0000) >> 16, + (current_ip & 0xff00) >> 8, + (current_ip & 0xff), + (unsigned int) (abstime & 0xffffffff)); + + if (0 == inet_aton(panicd_ip_str, (struct in_addr *) &panic_server_ip)) { + printf("inet_aton() failed interpreting %s as a panic server IP\n", panicd_ip_str); + } + else + printf("Attempting connection to panic server configured at IP %s, port %d\n", panicd_ip_str, panicd_port); - if (router_specified) { - if (0 == inet_aton(router_ip_str, (struct in_addr *) &parsed_router_ip)){ - printf("inet_aton() failed interpreting %s as an IP\n", router_ip); + destination_mac = router_mac; + + if (kdp_arp_resolve(panic_server_ip, &temp_mac)) { + printf("Resolved %s's (or proxy's) link level address\n", panicd_ip_str); + destination_mac = temp_mac; } - else { - router_ip = parsed_router_ip; - printf("Routing through specified router IP %s (%d)\n", router_ip_str, router_ip); - /* We will eventually need to resolve the router's MAC ourselves, - * if one is specified,rather than being set through the BSD callback - * but the _router_ip option does not function currently - */ + else { + if (!flag_panic_dump_in_progress) goto panic_dump_exit; + if (router_specified) { + if (0 == inet_aton(router_ip_str, (struct in_addr *) &parsed_router_ip)) + printf("inet_aton() failed interpreting %s as an IP\n", router_ip_str); + else { + router_ip = parsed_router_ip; + if (kdp_arp_resolve(router_ip, &temp_mac)) { + destination_mac = temp_mac; + printf("Routing through specified router IP %s (%d)\n", router_ip_str, router_ip); + } + } + } } - } - printf("Routing via router MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", - router_mac.ether_addr_octet[0] & 0xff, - router_mac.ether_addr_octet[1] & 0xff, - router_mac.ether_addr_octet[2] & 0xff, - router_mac.ether_addr_octet[3] & 0xff, - router_mac.ether_addr_octet[4] & 0xff, - router_mac.ether_addr_octet[5] & 0xff); + if (!flag_panic_dump_in_progress) goto panic_dump_exit; - printf("Kernel map size is %llu\n", (unsigned long long) get_vmmap_size(kernel_map)); - printf ("Sending write request for %s\n", corename); + printf("Transmitting packets to link level address: %02x:%02x:%02x:%02x:%02x:%02x\n", + destination_mac.ether_addr_octet[0] & 0xff, + destination_mac.ether_addr_octet[1] & 0xff, + destination_mac.ether_addr_octet[2] & 0xff, + destination_mac.ether_addr_octet[3] & 0xff, + destination_mac.ether_addr_octet[4] & 0xff, + destination_mac.ether_addr_octet[5] & 0xff); - if ((panic_error = kdp_send_panic_pkt (KDP_WRQ, corename, 0 , NULL)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); - goto panic_dump_exit; - } + printf("Kernel map size is %llu\n", (unsigned long long) get_vmmap_size(kernel_map)); + printf("Sending write request for %s\n", corename); - /* Just the panic log requested */ - if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) { - printf("Transmitting panic log, please wait: "); - kdp_send_panic_packets (KDP_DATA, corename, (debug_buf_ptr - debug_buf), (unsigned int) debug_buf); - kdp_send_panic_pkt (KDP_EOF, NULL, 0, ((void *) 0)); - printf("Please file a bug report on this panic, if possible.\n"); - goto panic_dump_exit; - } + if ((panic_error = kdp_send_crashdump_pkt(KDP_WRQ, corename, 0 , NULL)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + goto panic_dump_exit; + } + + /* Just the panic log requested */ + if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) { + printf("Transmitting panic log, please wait: "); + kdp_send_crashdump_data(KDP_DATA, corename, (debug_buf_ptr - debug_buf), debug_buf); + kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); + printf("Please file a bug report on this panic, if possible.\n"); + goto panic_dump_exit; + } - /* We want a core dump if we're here */ - kern_dump(); + /* We want a core dump if we're here */ + kern_dump(); panic_dump_exit: - not_in_kdp = 1; - flag_panic_dump_in_progress = 0; - panic_block = 0; - pkt.input = FALSE; - pkt.len = 0; - kdp_reset(); - return; + abort_panic_transfer(); + pkt.input = FALSE; + pkt.len = 0; + kdp_reset(); + return; } void -abort_panic_transfer() +abort_panic_transfer(void) { - flag_panic_dump_in_progress = 0; - not_in_kdp = 1; - panic_block = 0; + flag_panic_dump_in_progress = FALSE; + not_in_kdp = 1; + panic_block = 0; } diff --git a/osfmk/kdp/ml/i386/kdp_machdep.c b/osfmk/kdp/ml/i386/kdp_machdep.c index d4863a6ce..09e93e2b4 100644 --- a/osfmk/kdp/ml/i386/kdp_machdep.c +++ b/osfmk/kdp/ml/i386/kdp_machdep.c @@ -20,6 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include #include #include #include @@ -27,6 +28,15 @@ #include #include #include +#include +#include +#include /* for PE_halt_restart */ +#include /* for halt_all_cpus */ + +#include +#include +#include +#include #define KDP_TEST_HARNESS 0 #if KDP_TEST_HARNESS @@ -35,7 +45,8 @@ #define dprintf(x) #endif -extern void kdreboot(void); +extern cpu_type_t cpuid_cputype(void); +extern cpu_subtype_t cpuid_cpusubtype(void); void print_saved_state(void *); void kdp_call(void); @@ -44,12 +55,14 @@ boolean_t kdp_call_kdb(void); void kdp_getstate(i386_thread_state_t *); void kdp_setstate(i386_thread_state_t *); void kdp_print_phys(int); -void kdp_i386_backtrace(void *, int); -void kdp_i386_trap( - unsigned int, - struct i386_saved_state *, - kern_return_t, - vm_offset_t); + +int +machine_trace_thread(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); + +int +machine_trace_thread64(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); + +extern unsigned kdp_vm_read(caddr_t src, caddr_t dst, unsigned len); void kdp_exception( @@ -110,13 +123,13 @@ kdp_exception_ack( void kdp_getstate( - i386_thread_state_t *state + x86_thread_state32_t *state ) { - static i386_thread_state_t null_state; - struct i386_saved_state *saved_state; + static x86_thread_state32_t null_state; + x86_saved_state32_t *saved_state; - saved_state = (struct i386_saved_state *)kdp.saved_state; + saved_state = (x86_saved_state32_t *)kdp.saved_state; *state = null_state; state->eax = saved_state->eax; @@ -147,12 +160,12 @@ kdp_getstate( void kdp_setstate( - i386_thread_state_t *state + x86_thread_state32_t *state ) { - struct i386_saved_state *saved_state; + x86_saved_state32_t *saved_state; - saved_state = (struct i386_saved_state *)kdp.saved_state; + saved_state = (x86_saved_state32_t *)kdp.saved_state; saved_state->eax = state->eax; saved_state->ebx = state->ebx; @@ -180,20 +193,21 @@ kdp_machine_read_regs( __unused int *size ) { - static i386_thread_fpstate_t null_fpstate; + static struct i386_float_state null_fpstate; switch (flavor) { - case i386_THREAD_STATE: + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: dprintf(("kdp_readregs THREAD_STATE\n")); - kdp_getstate((i386_thread_state_t *)data); - *size = sizeof (i386_thread_state_t); + kdp_getstate((x86_thread_state32_t *)data); + *size = sizeof (x86_thread_state32_t); return KDPERR_NO_ERROR; - case i386_THREAD_FPSTATE: + case x86_FLOAT_STATE32: dprintf(("kdp_readregs THREAD_FPSTATE\n")); - *(i386_thread_fpstate_t *)data = null_fpstate; - *size = sizeof (i386_thread_fpstate_t); + *(x86_float_state32_t *)data = null_fpstate; + *size = sizeof (x86_float_state32_t); return KDPERR_NO_ERROR; default: @@ -213,12 +227,13 @@ kdp_machine_write_regs( { switch (flavor) { - case i386_THREAD_STATE: + case OLD_i386_THREAD_STATE: + case x86_THREAD_STATE32: dprintf(("kdp_writeregs THREAD_STATE\n")); - kdp_setstate((i386_thread_state_t *)data); + kdp_setstate((x86_thread_state32_t *)data); return KDPERR_NO_ERROR; - case i386_THREAD_FPSTATE: + case x86_FLOAT_STATE32: dprintf(("kdp_writeregs THREAD_FPSTATE\n")); return KDPERR_NO_ERROR; @@ -246,9 +261,8 @@ kdp_machine_hostinfo( hostinfo->cpus_mask |= (1 << i); } - /* FIXME?? */ - hostinfo->cpu_type = CPU_TYPE_I386; - hostinfo->cpu_subtype = CPU_SUBTYPE_486; + hostinfo->cpu_type = cpuid_cputype(); + hostinfo->cpu_subtype = cpuid_cpusubtype(); } void @@ -264,7 +278,12 @@ kdp_panic( void kdp_reboot(void) { - kdreboot(); + printf("Attempting system restart..."); + /* Call the platform specific restart*/ + if (PE_halt_restart) + (*PE_halt_restart)(kPERestartCPU); + /* If we do reach this, give up */ + halt_all_cpus(TRUE); } int @@ -293,14 +312,14 @@ kdp_us_spin(int usec) void print_saved_state(void *state) { - struct i386_saved_state *saved_state; + x86_saved_state32_t *saved_state; saved_state = state; kprintf("pc = 0x%x\n", saved_state->eip); - kprintf("cr3= 0x%x\n", saved_state->cr2); + kprintf("cr2= 0x%x\n", saved_state->cr2); kprintf("rp = TODO FIXME\n"); - kprintf("sp = 0x%x\n", saved_state->esp); + kprintf("sp = 0x%x\n", saved_state); } @@ -346,51 +365,24 @@ kdp_print_phys(int src) } - -#define MAX_FRAME_DELTA 65536 - -void -kdp_i386_backtrace(void *_frame, int nframes) -{ - cframe_t *frame = (cframe_t *)_frame; - int i; - - for (i=0; i VM_MAX_KERNEL_ADDRESS) { - goto invalid; - } - kprintf("frame 0x%x called by 0x%x ", - frame, frame->caller); - kprintf("args 0x%x 0x%x 0x%x 0x%x\n", - frame->args[0], frame->args[1], - frame->args[2], frame->args[3]); - if ((frame->prev < frame) || /* wrong direction */ - ((frame->prev - frame) > MAX_FRAME_DELTA)) { - goto invalid; - } - frame = frame->prev; - } - return; -invalid: - kprintf("invalid frame pointer 0x%x\n",frame); -} - -void +boolean_t kdp_i386_trap( - unsigned int trapno, - struct i386_saved_state *saved_state, + unsigned int trapno, + x86_saved_state32_t *saved_state, kern_return_t result, vm_offset_t va ) { unsigned int exception, subcode = 0, code; - mp_kdp_enter(); - - if (trapno != T_INT3 && trapno != T_DEBUG) + if (trapno != T_INT3 && trapno != T_DEBUG) { kprintf("unexpected kernel trap 0x%x eip 0x%x cr2 0x%x \n", - trapno, saved_state->eip, saved_state->esp); + trapno, saved_state->eip, saved_state->cr2); + if (!kdp.is_conn) + return FALSE; + } + + mp_kdp_enter(); switch (trapno) { @@ -455,11 +447,11 @@ kdp_i386_trap( break; } - kdp_i386_backtrace((void *) saved_state->ebp, 10); - kdp_raise_exception(exception, code, subcode, saved_state); mp_kdp_exit(); + + return TRUE; } boolean_t @@ -474,3 +466,84 @@ kdp_ml_get_breakinsn(void) { return 0xcc; } +extern pmap_t kdp_pmap; + +#define RETURN_OFFSET 4 +int +machine_trace_thread(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p) +{ + uint32_t *tracebuf = (uint32_t *)tracepos; + uint32_t fence = 0; + uint32_t stackptr = 0; + uint32_t stacklimit = 0xfc000000; + int framecount = 0; + uint32_t init_eip = 0; + uint32_t prevsp = 0; + uint32_t framesize = 2 * sizeof(vm_offset_t); + + if (user_p) { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + init_eip = iss32->eip; + stackptr = iss32->ebp; + + /* This bound isn't useful, but it doesn't hinder us*/ + stacklimit = 0xffffffff; + kdp_pmap = thread->task->map->pmap; + } + else { + /*Examine the i386_saved_state at the base of the kernel stack*/ + stackptr = STACK_IKS(thread->kernel_stack)->k_ebp; + init_eip = STACK_IKS(thread->kernel_stack)->k_eip; + } + + *tracebuf++ = init_eip; + + for (framecount = 0; framecount < nframes; framecount++) { + + if ((tracebound - ((uint32_t) tracebuf)) < (4 * framesize)) { + tracebuf--; + break; + } + + *tracebuf++ = stackptr; +/* Invalid frame, or hit fence */ + if (!stackptr || (stackptr == fence)) { + break; + } + /* Stack grows downward */ + if (stackptr < prevsp) { + break; + } + /* Unaligned frame */ + if (stackptr & 0x0000003) { + break; + } + if (stackptr > stacklimit) { + break; + } + + if (kdp_vm_read((caddr_t) (stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { + break; + } + tracebuf++; + + prevsp = stackptr; + if (kdp_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { + *tracebuf++ = 0; + break; + } + } + + kdp_pmap = 0; + + return ((uint32_t) tracebuf - tracepos); +} + +/* This is a stub until the x86 64-bit model becomes clear */ +int +machine_trace_thread64(__unused thread_t thread, __unused uint32_t tracepos, __unused uint32_t tracebound, __unused int nframes, __unused boolean_t user_p) { + return 0; +} diff --git a/osfmk/kdp/ml/i386/kdp_vm.c b/osfmk/kdp/ml/i386/kdp_vm.c index 6ad1202f4..3d1aaa7e4 100644 --- a/osfmk/kdp/ml/i386/kdp_vm.c +++ b/osfmk/kdp/ml/i386/kdp_vm.c @@ -23,14 +23,78 @@ #include #include #include - + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); -unsigned kdp_copy_kmem( caddr_t, caddr_t, unsigned); -int kern_dump(void); + +boolean_t kdp_trans_off = 0; +uint32_t kdp_src_high32 = 0; +extern pmap_paddr_t avail_start, avail_end; + +extern void bcopy_phys(addr64_t from, addr64_t to, int size); +static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); + +pmap_t kdp_pmap = 0; unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in kdp */ +extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; +extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; + +int kern_dump(void); +int kdp_dump_trap(int type, x86_saved_state32_t *regs); + +typedef struct { + int flavor; /* the number for this flavor */ + mach_msg_type_number_t count; /* count of ints in this flavor */ +} mythread_state_flavor_t; + +static mythread_state_flavor_t thread_flavor_array [] = { + {x86_THREAD_STATE32, x86_THREAD_STATE32_COUNT} +}; + +static int kdp_mynum_flavors = 1; +static int MAX_TSTATE_FLAVORS = 1; + +typedef struct { + vm_offset_t header; + int hoffset; + mythread_state_flavor_t *flavors; + int tstate_size; +} tir_t; + +char command_buffer[512]; + +static addr64_t +kdp_vtophys( + pmap_t pmap, + addr64_t va) +{ + addr64_t pa; + ppnum_t pp; +/* Clear high 32 - pmap_find_phys() may panic() otherwise */ + va &= 0xFFFFFFFFULL; + pp = pmap_find_phys(pmap, va); + if(!pp) return 0; + + pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL); + return(pa); +} + /* * */ @@ -39,7 +103,55 @@ unsigned kdp_vm_read( caddr_t dst, unsigned len) { - return kdp_copy_kmem(src, dst, len); + addr64_t cur_virt_src = (addr64_t)((unsigned int)src | (((uint64_t)kdp_src_high32) << 32)); + addr64_t cur_virt_dst = (addr64_t)((unsigned int)dst); + addr64_t cur_phys_dst, cur_phys_src; + unsigned resid = len; + unsigned cnt = 0; + pmap_t src_pmap = kernel_pmap; + +/* If a different pmap has been specified with kdp_pmap, use it to translate the + * source (cur_virt_src); otherwise, the source is translated using the + * kernel_pmap. + */ + if (kdp_pmap) + src_pmap = kdp_pmap; + + while (resid != 0) { +/* Translate, unless kdp_trans_off is set */ + if (!kdp_trans_off) { + if (!(cur_phys_src = kdp_vtophys(src_pmap, + cur_virt_src))) + goto exit; + } + else + cur_phys_src = cur_virt_src; + +/* Always translate the destination buffer using the kernel_pmap */ + if(!(cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst))) + goto exit; + + /* Validate physical page numbers when performing a crashdump */ + if (not_in_kdp == 0) + if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) + goto exit; + +/* Get length left on page */ + cnt = PAGE_SIZE - (cur_phys_src & PAGE_MASK); + if (cnt > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) + cnt = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); + if (cnt > resid) + cnt = resid; + +/* Do a physical copy */ + bcopy_phys(cur_phys_src, cur_phys_dst, cnt); + + cur_virt_src += cnt; + cur_virt_dst += cnt; + resid -= cnt; + } +exit: + return (len - resid); } /* @@ -50,11 +162,375 @@ unsigned kdp_vm_write( caddr_t dst, unsigned len) { - return kdp_copy_kmem(src, dst, len); + addr64_t cur_virt_src, cur_virt_dst; + addr64_t cur_phys_src, cur_phys_dst; + unsigned resid, cnt, cnt_src, cnt_dst; + +#ifdef KDP_VM_WRITE_DEBUG + printf("kdp_vm_write: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); +#endif + + cur_virt_src = (addr64_t)((unsigned int)src); + cur_virt_dst = (addr64_t)((unsigned int)dst); + + resid = len; + + while (resid != 0) { + if ((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) + goto exit; + + if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) + goto exit; + + cnt_src = ((cur_phys_src + PAGE_SIZE) & (PAGE_MASK)) - cur_phys_src; + cnt_dst = ((cur_phys_dst + PAGE_SIZE) & (PAGE_MASK)) - cur_phys_dst; + + if (cnt_src > cnt_dst) + cnt = cnt_dst; + else + cnt = cnt_src; + if (cnt > resid) + cnt = resid; + + bcopy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ + + cur_virt_src +=cnt; + cur_virt_dst +=cnt; + resid -= cnt; + } +exit: + return (len - resid); } -/* A stub until i386 support is added for remote kernel core dumps */ -int kern_dump(void) +static void +kern_collectth_state(thread_t thread, tir_t *t) { - return 0; + vm_offset_t header; + int hoffset, i ; + mythread_state_flavor_t *flavors; + struct thread_command *tc; + /* + * Fill in thread command structure. + */ + header = t->header; + hoffset = t->hoffset; + flavors = t->flavors; + + tc = (struct thread_command *) (header + hoffset); + tc->cmd = LC_THREAD; + tc->cmdsize = sizeof(struct thread_command) + t->tstate_size; + hoffset += sizeof(struct thread_command); + /* + * Follow with a struct thread_state_flavor and + * the appropriate thread state struct for each + * thread state flavor. + */ + for (i = 0; i < kdp_mynum_flavors; i++) { + *(mythread_state_flavor_t *)(header+hoffset) = + flavors[i]; + hoffset += sizeof(mythread_state_flavor_t); + /* Locate and obtain the non-volatile register context + * for this kernel thread. This should ideally be + * encapsulated in machine_thread_get_kern_state() + * but that routine appears to have been co-opted + * by CHUD to obtain pre-interrupt state. + */ + if (flavors[i].flavor == x86_THREAD_STATE32) { + x86_thread_state32_t *tstate = (x86_thread_state32_t *) (header + hoffset); + vm_offset_t kstack; + bzero(tstate, x86_THREAD_STATE32_COUNT * sizeof(int)); + if ((kstack = thread->kernel_stack) != 0){ + struct x86_kernel_state32 *iks = STACK_IKS(kstack); + tstate->ebx = iks->k_ebx; + tstate->esp = iks->k_esp; + tstate->ebp = iks->k_ebp; + tstate->edi = iks->k_edi; + tstate->esi = iks->k_esi; + tstate->eip = iks->k_eip; + } + } + else if (machine_thread_get_kern_state(thread, + flavors[i].flavor, (thread_state_t) (header+hoffset), + &flavors[i].count) != KERN_SUCCESS) + printf ("Failure in machine_thread_get_kern_state()\n"); + hoffset += flavors[i].count*sizeof(int); + } + + t->hoffset = hoffset; +} + +/* Intended to be called from the kernel trap handler if an unrecoverable fault + * occurs during a crashdump (which shouldn't happen since we validate mappings + * and so on). This should be reworked to attempt some form of recovery. + */ +int +kdp_dump_trap( + int type, + __unused x86_saved_state32_t *saved_state) +{ + printf ("An unexpected trap (type %d) occurred during the system dump, terminating.\n", type); + kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); + abort_panic_transfer(); + kdp_flag &= ~KDP_PANIC_DUMP_ENABLED; + kdp_flag &= ~PANIC_CORE_ON_NMI; + kdp_flag &= ~PANIC_LOG_DUMP; + + kdp_reset(); + + kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state); + return( 0 ); +} + +int +kern_dump(void) +{ + vm_map_t map; + unsigned int thread_count, segment_count; + unsigned int command_size = 0, header_size = 0, tstate_size = 0; + unsigned int hoffset = 0, foffset = 0, nfoffset = 0, vmoffset = 0; + unsigned int max_header_size = 0; + vm_offset_t header; + struct mach_header *mh; + struct segment_command *sc; + vm_size_t size; + vm_prot_t prot = 0; + vm_prot_t maxprot = 0; + vm_inherit_t inherit = 0; + mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS]; + vm_size_t nflavors; + vm_size_t i; + uint32_t nesting_depth = 0; + kern_return_t kret = 0; + struct vm_region_submap_info_64 vbr; + mach_msg_type_number_t vbrcount = 0; + tir_t tir1; + + int error = 0; + int panic_error = 0; + unsigned int txstart = 0; + unsigned int mach_section_count = 4; + unsigned int num_sects_txed = 0; + + map = kernel_map; + + not_in_kdp = 0; /* Signal vm functions not to acquire locks */ + + thread_count = 1; + segment_count = get_vmmap_entries(map); + + printf("Kernel map has %d entries\n", segment_count); + + nflavors = kdp_mynum_flavors; + bcopy((char *)thread_flavor_array,(char *) flavors,sizeof(thread_flavor_array)); + + for (i = 0; i < nflavors; i++) + tstate_size += sizeof(mythread_state_flavor_t) + + (flavors[i].count * sizeof(int)); + + command_size = (segment_count + mach_section_count) * + sizeof(struct segment_command) + + thread_count * sizeof(struct thread_command) + + tstate_size * thread_count; + + header_size = command_size + sizeof(struct mach_header); + header = (vm_offset_t) command_buffer; + + /* + * Set up Mach-O header for currently executing 32 bit kernel. + */ + printf ("Generated Mach-O header size was %d\n", header_size); + + mh = (struct mach_header *) header; + mh->magic = MH_MAGIC; + mh->cputype = cpu_type(); + mh->cpusubtype = cpu_subtype(); + mh->filetype = MH_CORE; + mh->ncmds = segment_count + thread_count + mach_section_count; + mh->sizeofcmds = command_size; + mh->flags = 0; + + hoffset = sizeof(struct mach_header); /* offset into header */ + foffset = round_page_32(header_size); /* offset into file */ + /* Padding */ + if ((foffset - header_size) < (4*sizeof(struct segment_command))) { + foffset += ((4*sizeof(struct segment_command)) - (foffset-header_size)); + } + + max_header_size = foffset; + + vmoffset = VM_MIN_ADDRESS; /* offset into VM */ + + /* Transmit the Mach-O MH_CORE header, and seek forward past the + * area reserved for the segment and thread commands + * to begin data transmission + */ + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct mach_header), (caddr_t) mh) < 0)) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + printf ("Transmitting kernel state, please wait: "); + + while ((segment_count > 0) || (kret == KERN_SUCCESS)){ + /* Check if we've transmitted all the kernel sections */ + if (num_sects_txed == mach_section_count) { + + while (1) { + + /* + * Get region information for next region. + */ + + vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; + if((kret = vm_region_recurse_64(map, + &vmoffset, &size, &nesting_depth, + (vm_region_recurse_info_t)&vbr, + &vbrcount)) != KERN_SUCCESS) { + break; + } + + if(vbr.is_submap) { + nesting_depth++; + continue; + } else { + break; + } + } + + if(kret != KERN_SUCCESS) + break; + + prot = vbr.protection; + maxprot = vbr.max_protection; + inherit = vbr.inheritance; + } + else + { + switch (num_sects_txed) { + case 0: + /* Transmit the kernel text section */ + vmoffset = sectTEXTB; + size = sectSizeTEXT; + break; + case 1: + vmoffset = sectDATAB; + size = sectSizeDATA; + break; + case 2: + vmoffset = sectPRELINKB; + size = sectSizePRELINK; + break; + case 3: + vmoffset = sectLINKB; + size = sectSizeLINK; + break; + } + num_sects_txed++; + } + /* + * Fill in segment command structure. + */ + + if (hoffset > max_header_size) + break; + sc = (struct segment_command *) (header); + sc->cmd = LC_SEGMENT; + sc->cmdsize = sizeof(struct segment_command); + sc->segname[0] = 0; + sc->vmaddr = vmoffset; + sc->vmsize = size; + sc->fileoff = foffset; + sc->filesize = size; + sc->maxprot = maxprot; + sc->initprot = prot; + sc->nsects = 0; + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct segment_command) , (caddr_t) sc)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + /* Do not transmit memory tagged VM_MEMORY_IOKIT - instead, + * seek past that region on the server - this creates a + * hole in the file. + */ + + if ((vbr.user_tag != VM_MEMORY_IOKIT)) { + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + txstart = vmoffset; + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, size, (caddr_t) txstart)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + } + + hoffset += sizeof(struct segment_command); + foffset += size; + vmoffset += size; + segment_count--; + } + tir1.header = header; + tir1.hoffset = 0; + tir1.flavors = flavors; + tir1.tstate_size = tstate_size; + + /* Now send out the LC_THREAD load command, with the thread information + * for the current activation. + * Note that the corefile can contain LC_SEGMENT commands with file + * offsets that point past the edge of the corefile, in the event that + * the last N VM regions were all I/O mapped or otherwise + * non-transferable memory, not followed by a normal VM region; + * i.e. there will be no hole that reaches to the end of the core file. + */ + kern_collectth_state (current_thread(), &tir1); + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + /* last packet */ + if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0) + { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } +out: + return (error); } diff --git a/osfmk/kdp/ml/ppc/kdp_machdep.c b/osfmk/kdp/ml/ppc/kdp_machdep.c index 8d90afd7d..f2d2df2e5 100644 --- a/osfmk/kdp/ml/ppc/kdp_machdep.c +++ b/osfmk/kdp/ml/ppc/kdp_machdep.c @@ -29,6 +29,12 @@ #include #include + +#include +#include +#include +#include + #define KDP_TEST_HARNESS 0 #if KDP_TEST_HARNESS #define dprintf(x) kprintf x @@ -42,6 +48,21 @@ void kdp_trap( unsigned int, struct savearea *saved_state); int kdp_getc(void); boolean_t kdp_call_kdb(void); +extern pmap_t kdp_pmap; +extern uint32_t kdp_src_high32; + + +extern unsigned kdp_vm_read(caddr_t src, caddr_t dst, unsigned len); + +int +machine_trace_thread(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); + +int +machine_trace_thread64(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p); + +unsigned +machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len); + void kdp_exception( unsigned char *pkt, @@ -628,3 +649,151 @@ unsigned int kdp_ml_get_breakinsn(void) { return 0x7fe00008; } +#define LR_OFFSET 8 +#define LR_OFFSET64 16 + +int +machine_trace_thread(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p) +{ + uint32_t *tracebuf = (uint32_t *)tracepos; + uint32_t fence = 0; + uint32_t stackptr = 0; + uint32_t stacklimit = 0xb0000000; + int framecount = 0; + uint32_t init_srr0 = 0; + uint32_t prevsp = 0; + uint32_t framesize = 2 * sizeof(vm_offset_t); + + if (user_p) { + /* Examine the user savearea */ + init_srr0 = thread->machine.upcb->save_srr0; + stackptr = thread->machine.upcb->save_r1; + /* This bound isn't useful, but it doesn't hinder us */ + stacklimit = 0xffffffff; + kdp_pmap = thread->task->map->pmap; + } + else { + stackptr = thread->machine.pcb->save_r1; + init_srr0 = thread->machine.pcb->save_srr0; + } + /* Fill in the "current" program counter */ + *tracebuf++ = init_srr0; + + for (framecount = 0; framecount < nframes; framecount++) { +/* Bounds check */ + if ((tracebound - ((uint32_t) tracebuf)) < (4 * framesize)) { + tracebuf--; + break; + } + + *tracebuf++ = stackptr; +/* Invalid frame, or hit fence */ + if (!stackptr || (stackptr == fence)) { + break; + } +/* Stack grows downward */ + if (stackptr < prevsp) { + break; + } +/* Unaligned frame */ + if (stackptr & 0x000000F) { + break; + } + if (stackptr > stacklimit) { + break; + } +/* Assume there's a saved link register, and read it */ + if (kdp_vm_read((caddr_t) (stackptr + LR_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { + break; + } + + tracebuf++; + prevsp = stackptr; +/* Next frame */ + if (kdp_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { + *tracebuf++ = 0; + break; + } + } +/* Reset the target pmap */ + kdp_pmap = 0; + return ((uint32_t) tracebuf - tracepos); +} + +/* Routine to encapsulate the 64-bit address read hack*/ +unsigned +machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len) +{ + uint32_t kdp_vm_read_low32; + unsigned retval; + + kdp_src_high32 = srcaddr >> 32; + kdp_vm_read_low32 = srcaddr & 0x00000000FFFFFFFFUL; + retval = kdp_vm_read((caddr_t)kdp_vm_read_low32, dstaddr, len); + kdp_src_high32 = 0; + return retval; +} + +int +machine_trace_thread64(thread_t thread, uint32_t tracepos, uint32_t tracebound, int nframes, boolean_t user_p) +{ + uint64_t *tracebuf = (uint64_t *)tracepos; + uint32_t fence = 0; + addr64_t stackptr = 0; + uint64_t stacklimit = 0xb0000000; + int framecount = 0; + addr64_t init_srr0 = 0; + addr64_t prevsp = 0; + unsigned framesize = 2 * sizeof(addr64_t); + + if (user_p) { + init_srr0 = thread->machine.upcb->save_srr0; + stackptr = thread->machine.upcb->save_r1; + stacklimit = 0xffffffffffffffffULL; + kdp_pmap = thread->task->map->pmap; + } + else { + stackptr = thread->machine.pcb->save_r1; + init_srr0 = thread->machine.pcb->save_srr0; + } + + *tracebuf++ = init_srr0; + + for (framecount = 0; framecount < nframes; framecount++) { + + if ((tracebound - ((uint32_t) tracebuf)) < (4 * framesize)) { + tracebuf--; + break; + } + + *tracebuf++ = stackptr; + + if (!stackptr || (stackptr == fence)){ + break; + } + if (stackptr < prevsp) { + break; + } + if (stackptr & 0x000000F) { + break; + } + if (stackptr > stacklimit) { + break; + } + + if (machine_read64(stackptr+LR_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) { + break; + } + tracebuf++; + + prevsp = stackptr; + if (machine_read64(stackptr, (caddr_t) &stackptr, sizeof(addr64_t)) != sizeof(addr64_t)) { + *tracebuf++ = 0; + break; + } + } + + kdp_pmap = 0; + + return ((uint32_t) tracebuf - tracepos); +} diff --git a/osfmk/kdp/ml/ppc/kdp_vm.c b/osfmk/kdp/ml/ppc/kdp_vm.c index c54cc73a2..12f9c9fd1 100644 --- a/osfmk/kdp/ml/ppc/kdp_vm.c +++ b/osfmk/kdp/ml/ppc/kdp_vm.c @@ -51,6 +51,7 @@ pmap_t kdp_pmap=0; boolean_t kdp_trans_off=0; boolean_t kdp_read_io =0; +uint32_t kdp_src_high32 = 0; unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); @@ -60,42 +61,18 @@ extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; /* XXX prototypes which should be in a commmon header file */ addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); -int kern_dump(void); -int kdp_dump_trap(int type, struct savearea *regs); -/* - * XXX the following prototype doesn't match the declaration because the - * XXX actual declaration is wrong. - */ -extern int kdp_send_panic_packets(unsigned int request, char *corename, - unsigned int length, caddr_t txstart); - - - +int kern_dump(void); +int kdp_dump_trap(int type, struct savearea *regs); typedef struct { int flavor; /* the number for this flavor */ int count; /* count of ints in this flavor */ } mythread_state_flavor_t; -/* These will need to be uncommented and completed - *if we support other architectures - */ - -/* -#if defined (__ppc__) -*/ static mythread_state_flavor_t thread_flavor_array[] = { {PPC_THREAD_STATE , PPC_THREAD_STATE_COUNT}, }; -/* -#elif defined (__i386__) -mythread_state_flavor_t thread_flavor_array [] = { - {i386_THREAD_STATE, i386_THREAD_STATE_COUNT}, -}; -#else -#error architecture not supported -#endif -*/ + static int kdp_mynum_flavors = 1; static int MAX_TSTATE_FLAVORS = 1; @@ -110,8 +87,6 @@ unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in char command_buffer[512]; -// XXX static struct vm_object test_object; - /* * */ @@ -150,13 +125,11 @@ unsigned kdp_vm_read( kprintf("kdp_vm_read1: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); #endif - cur_virt_src = (addr64_t)((unsigned int)src); + cur_virt_src = (addr64_t)((unsigned int)src | (((uint64_t)kdp_src_high32) << 32)); cur_virt_dst = (addr64_t)((unsigned int)dst); if (kdp_trans_off) { - - - resid = len; /* Get the length to copy */ + resid = len; /* Get the length to copy */ while (resid != 0) { @@ -317,7 +290,7 @@ kdp_dump_trap( __unused struct savearea *regs) { printf ("An unexpected trap (type %d) occurred during the kernel dump, terminating.\n", type); - kdp_send_panic_pkt (KDP_EOF, NULL, 0, ((void *) 0)); + kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); abort_panic_transfer(); kdp_flag &= ~KDP_PANIC_DUMP_ENABLED; kdp_flag &= ~PANIC_CORE_ON_NMI; @@ -417,25 +390,25 @@ kern_dump(void) * to begin data transmission */ - if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return -1; } - if ((panic_error = kdp_send_panic_packets (KDP_DATA, NULL, sizeof(struct mach_header), (caddr_t) mh) < 0)) { - printf ("kdp_send_panic_packets failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct mach_header), (caddr_t) mh) < 0)) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); return -1 ; } - if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return (-1); } printf ("Transmitting kernel state, please wait: "); while ((segment_count > 0) || (kret == KERN_SUCCESS)){ /* Check if we've transmitted all the kernel sections */ - if (num_sects_txed == mach_section_count-1) { + if (num_sects_txed == mach_section_count) { while (1) { @@ -519,13 +492,13 @@ kern_dump(void) sc->initprot = prot; sc->nsects = 0; - if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return -1; } - if ((panic_error = kdp_send_panic_packets (KDP_DATA, NULL, sizeof(struct segment_command) , (caddr_t) sc)) < 0) { - printf ("kdp_send_panic_packets failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct segment_command) , (caddr_t) sc)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); return -1 ; } @@ -535,15 +508,15 @@ kern_dump(void) if ((vbr.user_tag != VM_MEMORY_IOKIT)) { - if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return (-1); } txstart = vmoffset; - if ((panic_error = kdp_send_panic_packets (KDP_DATA, NULL, size, (caddr_t) txstart)) < 0) { - printf ("kdp_send_panic_packets failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, size, (caddr_t) txstart)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); return -1 ; } } @@ -568,20 +541,20 @@ kern_dump(void) */ kern_collectth_state (current_thread(), &tir1); - if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return -1; } - if ((panic_error = kdp_send_panic_packets (KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) { - printf ("kdp_send_panic_packets failed with error %d\n", panic_error); + if ((panic_error = kdp_send_crashdump_data(KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); return -1 ; } /* last packet */ - if ((panic_error = kdp_send_panic_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0) + if ((panic_error = kdp_send_crashdump_pkt(KDP_EOF, NULL, 0, ((void *) 0))) < 0) { - printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); return (-1) ; } diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile index ab810f2c2..383bac18c 100644 --- a/osfmk/kern/Makefile +++ b/osfmk/kern/Makefile @@ -15,6 +15,7 @@ EXPORT_ONLY_FILES = \ cpu_number.h \ cpu_data.h \ debug.h \ + etimer.h \ ipc_mig.h \ kalloc.h \ kern_types.h \ @@ -23,6 +24,8 @@ EXPORT_ONLY_FILES = \ host.h \ mach_param.h \ macro_help.h \ + pms.h \ + page_decrypt.h \ processor.h \ queue.h \ sched_prim.h \ diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c index ed95755ec..4fff10303 100644 --- a/osfmk/kern/ast.c +++ b/osfmk/kern/ast.c @@ -73,10 +73,7 @@ #include #include #include - -#ifdef __ppc__ -#include // for CHUD AST hook -#endif +#include // for CHUD AST hook void ast_init(void) @@ -96,12 +93,11 @@ ast_taken( ast_t *myast = ast_pending(); thread_t thread = current_thread(); -#ifdef __ppc__ /* * CHUD hook - all threads including idle processor threads */ if(perfASTHook) { - if(*myast & AST_PPC_CHUD_ALL) { + if(*myast & AST_CHUD_ALL) { perfASTHook(0, NULL, 0, 0); if(*myast == AST_NONE) { @@ -109,9 +105,8 @@ ast_taken( } } } else { - *myast &= ~AST_PPC_CHUD_ALL; + *myast &= ~AST_CHUD_ALL; } -#endif reasons &= *myast; *myast &= ~reasons; diff --git a/osfmk/kern/ast.h b/osfmk/kern/ast.h index 5a6de8d90..e6f951ae4 100644 --- a/osfmk/kern/ast.h +++ b/osfmk/kern/ast.h @@ -107,6 +107,11 @@ typedef uint32_t ast_t; #endif /* MACHINE_AST */ +#define AST_CHUD_URGENT 0x800 +#define AST_CHUD 0x400 + +#define AST_CHUD_ALL (AST_CHUD_URGENT|AST_CHUD) + /* Initialize module */ extern void ast_init(void); diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c index 57c4559d2..a9a805928 100644 --- a/osfmk/kern/bsd_kern.c +++ b/osfmk/kern/bsd_kern.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include /* last */ @@ -260,6 +261,9 @@ swap_task_map(task_t task,vm_map_t map) old_map = task->map; thread->map = task->map = map; task_unlock(task); + + inval_copy_windows(thread); + return old_map; } @@ -517,3 +521,159 @@ astbsd_on(void) ast_on_fast(AST_BSD); (void)ml_set_interrupts_enabled(reenable); } + + +#include + +void +fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo) +{ + vm_map_t map; + task_absolutetime_info_data_t tinfo; + thread_t thread; + int numrunning = 0; + + map = (task == kernel_task)? kernel_map: task->map; + + ptinfo->pti_virtual_size = map->size; + ptinfo->pti_resident_size = (mach_vm_size_t)(pmap_resident_count(map->pmap) + * PAGE_SIZE); + + task_lock(task); + + ptinfo->pti_policy = ((task != kernel_task)? + POLICY_TIMESHARE: POLICY_RR); + + tinfo.threads_user = tinfo.threads_system = 0; + tinfo.total_user = task->total_user_time; + tinfo.total_system = task->total_system_time; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + uint64_t tval; + + if ((thread->state & TH_RUN) == TH_RUN) + numrunning++; + tval = timer_grab(&thread->user_timer); + tinfo.threads_user += tval; + tinfo.total_user += tval; + + tval = timer_grab(&thread->system_timer); + tinfo.threads_system += tval; + tinfo.total_system += tval; + } + + ptinfo->pti_total_system = tinfo.total_system; + ptinfo->pti_total_user = tinfo.total_user; + ptinfo->pti_threads_system = tinfo.threads_system; + ptinfo->pti_threads_user = tinfo.threads_user; + + ptinfo->pti_faults = task->faults; + ptinfo->pti_pageins = task->pageins; + ptinfo->pti_cow_faults = task->cow_faults; + ptinfo->pti_messages_sent = task->messages_sent; + ptinfo->pti_messages_received = task->messages_received; + ptinfo->pti_syscalls_mach = task->syscalls_mach; + ptinfo->pti_syscalls_unix = task->syscalls_unix; + ptinfo->pti_csw = task->csw; + ptinfo->pti_threadnum = task->thread_count; + ptinfo->pti_numrunning = numrunning; + ptinfo->pti_priority = task->priority; + + task_unlock(task); +} + +int +fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_internal * ptinfo) +{ + thread_t thact; + int err=0, count; + thread_basic_info_data_t basic_info; + kern_return_t kret; + + task_lock(task); + + for (thact = (thread_t)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)thact); ) { +#if defined(__ppc__) + if (thact->machine.cthread_self == thaddr) +#elif defined (__i386__) + if (thact->machine.pcb->cthread_self == thaddr) +#else +#error architecture not supported +#endif + { + + count = THREAD_BASIC_INFO_COUNT; + if ((kret = thread_info_internal(thact, THREAD_BASIC_INFO, &basic_info, &count)) != KERN_SUCCESS) { + err = 1; + goto out; + } +#if 0 + ptinfo->pth_user_time = timer_grab(&basic_info.user_time); + ptinfo->pth_system_time = timer_grab(&basic_info.system_time); +#else + ptinfo->pth_user_time = ((basic_info.user_time.seconds * NSEC_PER_SEC) + (basic_info.user_time.microseconds * NSEC_PER_USEC)); + ptinfo->pth_system_time = ((basic_info.system_time.seconds * NSEC_PER_SEC) + (basic_info.system_time.microseconds * NSEC_PER_USEC)); + +#endif + ptinfo->pth_cpu_usage = basic_info.cpu_usage; + ptinfo->pth_policy = basic_info.policy; + ptinfo->pth_run_state = basic_info.run_state; + ptinfo->pth_flags = basic_info.flags; + ptinfo->pth_sleep_time = basic_info.sleep_time; + ptinfo->pth_curpri = thact->sched_pri; + ptinfo->pth_priority = thact->priority; + ptinfo->pth_maxpriority = thact->max_priority; + + err = 0; + goto out; + } + thact = (thread_t)queue_next(&thact->task_threads); + } + err = 1; + +out: + task_unlock(task); + return(err); +} + +int +fill_taskthreadlist(task_t task, void * buffer, int thcount) +{ + int numthr=0; + thread_t thact; + uint64_t * uptr; + uint64_t thaddr; + + uptr = (uint64_t *)buffer; + + task_lock(task); + + for (thact = (thread_t)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)thact); ) { +#if defined(__ppc__) + thaddr = thact->machine.cthread_self; +#elif defined (__i386__) + thaddr = thact->machine.pcb->cthread_self; +#else +#error architecture not supported +#endif + *uptr++ = thaddr; + numthr++; + if (numthr >= thcount) + goto out; + thact = (thread_t)queue_next(&thact->task_threads); + } + +out: + task_unlock(task); + return(numthr * sizeof(uint64_t)); + +} + +int +get_numthreads(task_t task) +{ + return(task->thread_count); +} + diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 25fecf46f..4085c5883 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,109 +23,89 @@ * @OSF_COPYRIGHT@ */ /* - * File: kern/clock.c - * Purpose: Routines for the creation and use of kernel - * alarm clock services. This file and the ipc - * routines in kern/ipc_clock.c constitute the - * machine-independent clock service layer. */ -#include - #include -#include -#include -#include -#include -#include #include -#include #include #include #include -#include #include -#include +#include + +#include -#include -#include +#include #include -#include #include -#include -#include -#include +decl_simple_lock_data(static,clock_lock) /* - * Exported interface + * Time of day (calendar) variables. + * + * Algorithm: + * + * TOD <- (seconds + epoch, fraction) <- CONV(current absolute time + offset) + * + * where CONV converts absolute time units into seconds and a fraction. */ +static struct clock_calend { + uint64_t epoch; + uint64_t offset; +} clock_calend; -#include -#include +/* + * Calendar adjustment variables and values. + */ +#define calend_adjperiod (NSEC_PER_SEC / 100) /* adjustment period, ns */ +#define calend_adjskew (40 * NSEC_PER_USEC) /* "standard" skew, ns / period */ +#define calend_adjbig (NSEC_PER_SEC) /* use 10x skew above adjbig ns */ + +static uint64_t calend_adjstart; /* Absolute time value for start of this adjustment period */ +static uint32_t calend_adjoffset; /* Absolute time offset for this adjustment period as absolute value */ -/* local data declarations */ -decl_simple_lock_data(static,ClockLock) /* clock system synchronization */ -static struct zone *alarm_zone; /* zone for user alarms */ -static struct alarm *alrmfree; /* alarm free list pointer */ -static struct alarm *alrmdone; /* alarm done list pointer */ -static long alrm_seqno; /* uniquely identifies alarms */ -static thread_call_data_t alarm_deliver; +static int32_t calend_adjdelta; /* Nanosecond time delta for this adjustment period */ +static int64_t calend_adjtotal; /* Nanosecond remaining total adjustment */ -decl_simple_lock_data(static,calend_adjlock) +static uint64_t calend_adjdeadline; /* Absolute time value for next adjustment period */ +static uint32_t calend_adjinterval; /* Absolute time interval of adjustment period */ static timer_call_data_t calend_adjcall; -static uint64_t calend_adjdeadline; +static uint32_t calend_adjactive; + +static uint32_t calend_set_adjustment( + int32_t *secs, + int32_t *microsecs); + +static void calend_adjust_call(void); +static uint32_t calend_adjust(void); static thread_call_data_t calend_wakecall; -/* external declarations */ -extern struct clock clock_list[]; -extern int clock_count; - -/* local clock subroutines */ -static -void flush_alarms( - clock_t clock); - -static -void post_alarm( - clock_t clock, - alarm_t alarm); - -static -int check_time( - alarm_type_t alarm_type, - mach_timespec_t *alarm_time, - mach_timespec_t *clock_time); - -static -void clock_alarm_deliver( - thread_call_param_t p0, - thread_call_param_t p1); - -static -void calend_adjust_call( - timer_call_param_t p0, - timer_call_param_t p1); - -static -void calend_dowakeup( - thread_call_param_t p0, - thread_call_param_t p1); +extern void IOKitResetTime(void); -/* - * Macros to lock/unlock clock system. - */ -#define LOCK_CLOCK(s) \ - s = splclock(); \ - simple_lock(&ClockLock); +static uint64_t clock_boottime; /* Seconds boottime epoch */ -#define UNLOCK_CLOCK(s) \ - simple_unlock(&ClockLock); \ - splx(s); +#define TIME_ADD(rsecs, secs, rfrac, frac, unit) \ +MACRO_BEGIN \ + if (((rfrac) += (frac)) >= (unit)) { \ + (rfrac) -= (unit); \ + (rsecs) += 1; \ + } \ + (rsecs) += (secs); \ +MACRO_END + +#define TIME_SUB(rsecs, secs, rfrac, frac, unit) \ +MACRO_BEGIN \ + if ((int32_t)((rfrac) -= (frac)) < 0) { \ + (rfrac) += (unit); \ + (rsecs) -= 1; \ + } \ + (rsecs) -= (secs); \ +MACRO_END /* * clock_config: @@ -135,37 +115,17 @@ void calend_dowakeup( void clock_config(void) { - clock_t clock; - register int i; - - assert(cpu_number() == master_cpu); - - simple_lock_init(&ClockLock, 0); - thread_call_setup(&alarm_deliver, clock_alarm_deliver, NULL); + simple_lock_init(&clock_lock, 0); - simple_lock_init(&calend_adjlock, 0); - timer_call_setup(&calend_adjcall, calend_adjust_call, NULL); + timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL); + thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL); - thread_call_setup(&calend_wakecall, calend_dowakeup, NULL); - - /* - * Configure clock devices. - */ - for (i = 0; i < clock_count; i++) { - clock = &clock_list[i]; - if (clock->cl_ops) { - if ((*clock->cl_ops->c_config)() == 0) - clock->cl_ops = 0; - } - } + clock_oldconfig(); /* * Initialize the timer callouts. */ timer_call_initialize(); - - /* start alarm sequence numbers at 0 */ - alrm_seqno = 0; } /* @@ -176,696 +136,506 @@ clock_config(void) void clock_init(void) { - clock_t clock; - register int i; - - /* - * Initialize basic clock structures. - */ - for (i = 0; i < clock_count; i++) { - clock = &clock_list[i]; - if (clock->cl_ops && clock->cl_ops->c_init) - (*clock->cl_ops->c_init)(); - } + clock_oldinit(); } /* - * Called by machine dependent code - * to initialize areas dependent on the - * timebase value. May be called multiple - * times during start up. + * clock_timebase_init: + * + * Called by machine dependent code + * to initialize areas dependent on the + * timebase value. May be called multiple + * times during start up. */ void clock_timebase_init(void) { - sched_timebase_init(); -} - -/* - * Initialize the clock ipc service facility. - */ -void -clock_service_create(void) -{ - clock_t clock; - register int i; + uint64_t abstime; - /* - * Initialize ipc clock services. - */ - for (i = 0; i < clock_count; i++) { - clock = &clock_list[i]; - if (clock->cl_ops) { - ipc_clock_init(clock); - ipc_clock_enable(clock); - } - } + nanoseconds_to_absolutetime(calend_adjperiod, &abstime); + calend_adjinterval = abstime; - /* - * Perform miscellaneous late - * initialization. - */ - i = sizeof(struct alarm); - alarm_zone = zinit(i, (4096/i)*i, 10*i, "alarms"); + sched_timebase_init(); } /* - * Get the service port on a clock. + * mach_timebase_info_trap: + * + * User trap returns timebase constant. */ kern_return_t -host_get_clock_service( - host_t host, - clock_id_t clock_id, - clock_t *clock) /* OUT */ +mach_timebase_info_trap( + struct mach_timebase_info_trap_args *args) { - if (host == HOST_NULL || clock_id < 0 || clock_id >= clock_count) { - *clock = CLOCK_NULL; - return (KERN_INVALID_ARGUMENT); - } + mach_vm_address_t out_info_addr = args->info; + mach_timebase_info_data_t info; - *clock = &clock_list[clock_id]; - if ((*clock)->cl_ops == 0) - return (KERN_FAILURE); - return (KERN_SUCCESS); -} + clock_timebase_info(&info); -/* - * Get the control port on a clock. - */ -kern_return_t -host_get_clock_control( - host_priv_t host_priv, - clock_id_t clock_id, - clock_t *clock) /* OUT */ -{ - if (host_priv == HOST_PRIV_NULL || clock_id < 0 || clock_id >= clock_count) { - *clock = CLOCK_NULL; - return (KERN_INVALID_ARGUMENT); - } + copyout((void *)&info, out_info_addr, sizeof (info)); - *clock = &clock_list[clock_id]; - if ((*clock)->cl_ops == 0) - return (KERN_FAILURE); return (KERN_SUCCESS); } /* - * Get the current clock time. + * Calendar routines. */ -kern_return_t -clock_get_time( - clock_t clock, - mach_timespec_t *cur_time) /* OUT */ -{ - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - return ((*clock->cl_ops->c_gettime)(cur_time)); -} /* - * Get clock attributes. + * clock_get_calendar_microtime: + * + * Returns the current calendar value, + * microseconds as the fraction. */ -kern_return_t -clock_get_attributes( - clock_t clock, - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ +void +clock_get_calendar_microtime( + uint32_t *secs, + uint32_t *microsecs) { - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - if (clock->cl_ops->c_getattr) - return(clock->cl_ops->c_getattr(flavor, attr, count)); - else - return (KERN_FAILURE); -} + uint64_t now; + spl_t s; -/* - * Set the current clock time. - */ -kern_return_t -clock_set_time( - clock_t clock, - mach_timespec_t new_time) -{ - mach_timespec_t *clock_time; + s = splclock(); + simple_lock(&clock_lock); - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - if (clock->cl_ops->c_settime == NULL) - return (KERN_FAILURE); - clock_time = &new_time; - if (BAD_MACH_TIMESPEC(clock_time)) - return (KERN_INVALID_VALUE); + now = mach_absolute_time(); - /* - * Flush all outstanding alarms. - */ - flush_alarms(clock); + if (calend_adjdelta < 0) { + uint32_t t32; - /* - * Set the new time. - */ - return (clock->cl_ops->c_settime(clock_time)); + if (now > calend_adjstart) { + t32 = now - calend_adjstart; + + if (t32 > calend_adjoffset) + now -= calend_adjoffset; + else + now = calend_adjstart; + } + } + + now += clock_calend.offset; + + absolutetime_to_microtime(now, secs, microsecs); + + *secs += clock_calend.epoch; + + simple_unlock(&clock_lock); + splx(s); } /* - * Set the clock alarm resolution. + * clock_get_calendar_nanotime: + * + * Returns the current calendar value, + * nanoseconds as the fraction. + * + * Since we do not have an interface to + * set the calendar with resolution greater + * than a microsecond, we honor that here. */ -kern_return_t -clock_set_attributes( - clock_t clock, - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t count) +void +clock_get_calendar_nanotime( + uint32_t *secs, + uint32_t *nanosecs) { - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - if (clock->cl_ops->c_setattr) - return (clock->cl_ops->c_setattr(flavor, attr, count)); - else - return (KERN_FAILURE); + uint64_t now; + spl_t s; + + s = splclock(); + simple_lock(&clock_lock); + + now = mach_absolute_time(); + + if (calend_adjdelta < 0) { + uint32_t t32; + + if (now > calend_adjstart) { + t32 = now - calend_adjstart; + + if (t32 > calend_adjoffset) + now -= calend_adjoffset; + else + now = calend_adjstart; + } + } + + now += clock_calend.offset; + + absolutetime_to_microtime(now, secs, nanosecs); + *nanosecs *= NSEC_PER_USEC; + + *secs += clock_calend.epoch; + + simple_unlock(&clock_lock); + splx(s); } /* - * Setup a clock alarm. + * clock_gettimeofday: + * + * Kernel interface for commpage implementation of + * gettimeofday() syscall. + * + * Returns the current calendar value, and updates the + * commpage info as appropriate. Because most calls to + * gettimeofday() are handled in user mode by the commpage, + * this routine should be used infrequently. */ -kern_return_t -clock_alarm( - clock_t clock, - alarm_type_t alarm_type, - mach_timespec_t alarm_time, - ipc_port_t alarm_port, - mach_msg_type_name_t alarm_port_type) +void +clock_gettimeofday( + uint32_t *secs, + uint32_t *microsecs) { - alarm_t alarm; - mach_timespec_t clock_time; - int chkstat; - kern_return_t reply_code; - spl_t s; - - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - if (clock->cl_ops->c_setalrm == 0) - return (KERN_FAILURE); - if (IP_VALID(alarm_port) == 0) - return (KERN_INVALID_CAPABILITY); + uint64_t now; + spl_t s; - /* - * Check alarm parameters. If parameters are invalid, - * send alarm message immediately. - */ - (*clock->cl_ops->c_gettime)(&clock_time); - chkstat = check_time(alarm_type, &alarm_time, &clock_time); - if (chkstat <= 0) { - reply_code = (chkstat < 0 ? KERN_INVALID_VALUE : KERN_SUCCESS); - clock_alarm_reply(alarm_port, alarm_port_type, - reply_code, alarm_type, clock_time); - return (KERN_SUCCESS); + s = splclock(); + simple_lock(&clock_lock); + + now = mach_absolute_time(); + + if (calend_adjdelta >= 0) { + clock_gettimeofday_set_commpage(now, clock_calend.epoch, clock_calend.offset, secs, microsecs); } + else { + uint32_t t32; - /* - * Get alarm and add to clock alarm list. - */ + if (now > calend_adjstart) { + t32 = now - calend_adjstart; + + if (t32 > calend_adjoffset) + now -= calend_adjoffset; + else + now = calend_adjstart; + } + + now += clock_calend.offset; - LOCK_CLOCK(s); - if ((alarm = alrmfree) == 0) { - UNLOCK_CLOCK(s); - alarm = (alarm_t) zalloc(alarm_zone); - if (alarm == 0) - return (KERN_RESOURCE_SHORTAGE); - LOCK_CLOCK(s); + absolutetime_to_microtime(now, secs, microsecs); + + *secs += clock_calend.epoch; } - else - alrmfree = alarm->al_next; - - alarm->al_status = ALARM_CLOCK; - alarm->al_time = alarm_time; - alarm->al_type = alarm_type; - alarm->al_port = alarm_port; - alarm->al_port_type = alarm_port_type; - alarm->al_clock = clock; - alarm->al_seqno = alrm_seqno++; - post_alarm(clock, alarm); - UNLOCK_CLOCK(s); - return (KERN_SUCCESS); + simple_unlock(&clock_lock); + splx(s); } /* - * Sleep on a clock. System trap. User-level libmach clock_sleep - * interface call takes a mach_timespec_t sleep_time argument which it - * converts to sleep_sec and sleep_nsec arguments which are then - * passed to clock_sleep_trap. + * clock_set_calendar_microtime: + * + * Sets the current calendar value by + * recalculating the epoch and offset + * from the system clock. + * + * Also adjusts the boottime to keep the + * value consistent, writes the new + * calendar value to the platform clock, + * and sends calendar change notifications. */ -kern_return_t -clock_sleep_trap( - struct clock_sleep_trap_args *args) +void +clock_set_calendar_microtime( + uint32_t secs, + uint32_t microsecs) { - mach_port_name_t clock_name = args->clock_name; - sleep_type_t sleep_type = args->sleep_type; - int sleep_sec = args->sleep_sec; - int sleep_nsec = args->sleep_nsec; - mach_vm_address_t wakeup_time_addr = args->wakeup_time; - clock_t clock; - mach_timespec_t swtime; - kern_return_t rvalue; + uint32_t sys, microsys; + uint32_t newsecs; + spl_t s; - /* - * Convert the trap parameters. - */ - if (clock_name != MACH_PORT_NULL) - clock = port_name_to_clock(clock_name); - else - clock = &clock_list[SYSTEM_CLOCK]; + newsecs = (microsecs < 500*USEC_PER_SEC)? + secs: secs + 1; + + s = splclock(); + simple_lock(&clock_lock); - swtime.tv_sec = sleep_sec; - swtime.tv_nsec = sleep_nsec; + commpage_set_timestamp(0,0,0); /* - * Call the actual clock_sleep routine. + * Calculate the new calendar epoch based on + * the new value and the system clock. */ - rvalue = clock_sleep_internal(clock, sleep_type, &swtime); + clock_get_system_microtime(&sys, µsys); + TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); /* - * Return current time as wakeup time. + * Adjust the boottime based on the delta. */ - if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) { - copyout((char *)&swtime, wakeup_time_addr, sizeof(mach_timespec_t)); - } - return (rvalue); -} - -/* - * Kernel internally callable clock sleep routine. The calling - * thread is suspended until the requested sleep time is reached. - */ -kern_return_t -clock_sleep_internal( - clock_t clock, - sleep_type_t sleep_type, - mach_timespec_t *sleep_time) -{ - alarm_t alarm; - mach_timespec_t clock_time; - kern_return_t rvalue; - int chkstat; - spl_t s; - - if (clock == CLOCK_NULL) - return (KERN_INVALID_ARGUMENT); - if (clock->cl_ops->c_setalrm == 0) - return (KERN_FAILURE); + clock_boottime += secs - clock_calend.epoch; /* - * Check sleep parameters. If parameters are invalid - * return an error, otherwise post alarm request. + * Set the new calendar epoch. */ - (*clock->cl_ops->c_gettime)(&clock_time); + clock_calend.epoch = secs; + nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset); - chkstat = check_time(sleep_type, sleep_time, &clock_time); - if (chkstat < 0) - return (KERN_INVALID_VALUE); - rvalue = KERN_SUCCESS; - if (chkstat > 0) { - wait_result_t wait_result; + /* + * Cancel any adjustment in progress. + */ + calend_adjdelta = calend_adjtotal = 0; - /* - * Get alarm and add to clock alarm list. - */ + simple_unlock(&clock_lock); - LOCK_CLOCK(s); - if ((alarm = alrmfree) == 0) { - UNLOCK_CLOCK(s); - alarm = (alarm_t) zalloc(alarm_zone); - if (alarm == 0) - return (KERN_RESOURCE_SHORTAGE); - LOCK_CLOCK(s); - } - else - alrmfree = alarm->al_next; + /* + * Set the new value for the platform clock. + */ + PESetGMTTimeOfDay(newsecs); - /* - * Wait for alarm to occur. - */ - wait_result = assert_wait((event_t)alarm, THREAD_ABORTSAFE); - if (wait_result == THREAD_WAITING) { - alarm->al_time = *sleep_time; - alarm->al_status = ALARM_SLEEP; - post_alarm(clock, alarm); - UNLOCK_CLOCK(s); - - wait_result = thread_block(THREAD_CONTINUE_NULL); - - /* - * Note if alarm expired normally or whether it - * was aborted. If aborted, delete alarm from - * clock alarm list. Return alarm to free list. - */ - LOCK_CLOCK(s); - if (alarm->al_status != ALARM_DONE) { - assert(wait_result != THREAD_AWAKENED); - if (((alarm->al_prev)->al_next = alarm->al_next) != NULL) - (alarm->al_next)->al_prev = alarm->al_prev; - rvalue = KERN_ABORTED; - } - *sleep_time = alarm->al_time; - alarm->al_status = ALARM_FREE; - } else { - assert(wait_result == THREAD_INTERRUPTED); - assert(alarm->al_status == ALARM_FREE); - rvalue = KERN_ABORTED; - } - alarm->al_next = alrmfree; - alrmfree = alarm; - UNLOCK_CLOCK(s); - } - else - *sleep_time = clock_time; + splx(s); - return (rvalue); + /* + * Send host notifications. + */ + host_notify_calendar_change(); } /* - * CLOCK INTERRUPT SERVICE ROUTINES. - */ - -/* - * Service clock alarm interrupts. Called from machine dependent - * layer at splclock(). The clock_id argument specifies the clock, - * and the clock_time argument gives that clock's current time. + * clock_initialize_calendar: + * + * Set the calendar and related clocks + * from the platform clock at boot or + * wake event. + * + * Also sends host notifications. */ void -clock_alarm_intr( - clock_id_t clock_id, - mach_timespec_t *clock_time) +clock_initialize_calendar(void) { - clock_t clock; - register alarm_t alrm1; - register alarm_t alrm2; - mach_timespec_t *alarm_time; - spl_t s; + uint32_t sys, microsys; + uint32_t microsecs = 0, secs = PEGetGMTTimeOfDay(); + spl_t s; - clock = &clock_list[clock_id]; + s = splclock(); + simple_lock(&clock_lock); - /* - * Update clock alarm list. All alarms that are due are moved - * to the alarmdone list to be serviced by the alarm_thread. - */ + commpage_set_timestamp(0,0,0); - LOCK_CLOCK(s); - alrm1 = (alarm_t) &clock->cl_alarm; - while ((alrm2 = alrm1->al_next) != NULL) { - alarm_time = &alrm2->al_time; - if (CMP_MACH_TIMESPEC(alarm_time, clock_time) > 0) - break; + if ((int32_t)secs >= (int32_t)clock_boottime) { + /* + * Initialize the boot time based on the platform clock. + */ + if (clock_boottime == 0) + clock_boottime = secs; /* - * Alarm has expired, so remove it from the - * clock alarm list. - */ - if ((alrm1->al_next = alrm2->al_next) != NULL) - (alrm1->al_next)->al_prev = alrm1; + * Calculate the new calendar epoch based on + * the platform clock and the system clock. + */ + clock_get_system_microtime(&sys, µsys); + TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); /* - * If a clock_sleep() alarm, wakeup the thread - * which issued the clock_sleep() call. + * Set the new calendar epoch. */ - if (alrm2->al_status == ALARM_SLEEP) { - alrm2->al_next = 0; - alrm2->al_status = ALARM_DONE; - alrm2->al_time = *clock_time; - thread_wakeup((event_t)alrm2); - } + clock_calend.epoch = secs; + nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset); - /* - * If a clock_alarm() alarm, place the alarm on - * the alarm done list and schedule the alarm - * delivery mechanism. + /* + * Cancel any adjustment in progress. */ - else { - assert(alrm2->al_status == ALARM_CLOCK); - if ((alrm2->al_next = alrmdone) != NULL) - alrmdone->al_prev = alrm2; - else - thread_call_enter(&alarm_deliver); - alrm2->al_prev = (alarm_t) &alrmdone; - alrmdone = alrm2; - alrm2->al_status = ALARM_DONE; - alrm2->al_time = *clock_time; - } + calend_adjdelta = calend_adjtotal = 0; } + simple_unlock(&clock_lock); + splx(s); + /* - * Setup the clock dependent layer to deliver another - * interrupt for the next pending alarm. + * Send host notifications. */ - if (alrm2) - (*clock->cl_ops->c_setalrm)(alarm_time); - UNLOCK_CLOCK(s); + host_notify_calendar_change(); } /* - * ALARM DELIVERY ROUTINES. + * clock_get_boottime_nanotime: + * + * Return the boottime, used by sysctl. */ - -static void -clock_alarm_deliver( - __unused thread_call_param_t p0, - __unused thread_call_param_t p1) +void +clock_get_boottime_nanotime( + uint32_t *secs, + uint32_t *nanosecs) { - register alarm_t alrm; - kern_return_t code; - spl_t s; - - LOCK_CLOCK(s); - while ((alrm = alrmdone) != NULL) { - if ((alrmdone = alrm->al_next) != NULL) - alrmdone->al_prev = (alarm_t) &alrmdone; - UNLOCK_CLOCK(s); - - code = (alrm->al_status == ALARM_DONE? KERN_SUCCESS: KERN_ABORTED); - if (alrm->al_port != IP_NULL) { - /* Deliver message to designated port */ - if (IP_VALID(alrm->al_port)) { - clock_alarm_reply(alrm->al_port, alrm->al_port_type, code, - alrm->al_type, alrm->al_time); - } - - LOCK_CLOCK(s); - alrm->al_status = ALARM_FREE; - alrm->al_next = alrmfree; - alrmfree = alrm; - } - else - panic("clock_alarm_deliver"); - } - - UNLOCK_CLOCK(s); + *secs = clock_boottime; + *nanosecs = 0; } /* - * CLOCK PRIVATE SERVICING SUBROUTINES. - */ - -/* - * Flush all pending alarms on a clock. All alarms - * are activated and timestamped correctly, so any - * programs waiting on alarms/threads will proceed - * with accurate information. + * clock_adjtime: + * + * Interface to adjtime() syscall. + * + * Calculates adjustment variables and + * initiates adjustment. */ -static void -flush_alarms( - clock_t clock) +clock_adjtime( + int32_t *secs, + int32_t *microsecs) { - register alarm_t alrm1, alrm2; - spl_t s; + uint32_t interval; + spl_t s; - /* - * Flush all outstanding alarms. - */ - LOCK_CLOCK(s); - alrm1 = (alarm_t) &clock->cl_alarm; - while ((alrm2 = alrm1->al_next) != NULL) { - /* - * Remove alarm from the clock alarm list. - */ - if ((alrm1->al_next = alrm2->al_next) != NULL) - (alrm1->al_next)->al_prev = alrm1; + s = splclock(); + simple_lock(&clock_lock); - /* - * If a clock_sleep() alarm, wakeup the thread - * which issued the clock_sleep() call. - */ - if (alrm2->al_status == ALARM_SLEEP) { - alrm2->al_next = 0; - thread_wakeup((event_t)alrm2); - } - else { - /* - * If a clock_alarm() alarm, place the alarm on - * the alarm done list and wakeup the dedicated - * kernel alarm_thread to service the alarm. - */ - assert(alrm2->al_status == ALARM_CLOCK); - if ((alrm2->al_next = alrmdone) != NULL) - alrmdone->al_prev = alrm2; - else - thread_wakeup((event_t)&alrmdone); - alrm2->al_prev = (alarm_t) &alrmdone; - alrmdone = alrm2; - } + interval = calend_set_adjustment(secs, microsecs); + if (interval != 0) { + calend_adjdeadline = mach_absolute_time() + interval; + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) + calend_adjactive++; } - UNLOCK_CLOCK(s); + else + if (timer_call_cancel(&calend_adjcall)) + calend_adjactive--; + + simple_unlock(&clock_lock); + splx(s); } -/* - * Post an alarm on a clock's active alarm list. The alarm is - * inserted in time-order into the clock's active alarm list. - * Always called from within a LOCK_CLOCK() code section. - */ -static -void -post_alarm( - clock_t clock, - alarm_t alarm) +static uint32_t +calend_set_adjustment( + int32_t *secs, + int32_t *microsecs) { - register alarm_t alrm1, alrm2; - mach_timespec_t *alarm_time; - mach_timespec_t *queue_time; + uint64_t now, t64; + int64_t total, ototal; + uint32_t interval = 0; - /* - * Traverse alarm list until queue time is greater - * than alarm time, then insert alarm. - */ - alarm_time = &alarm->al_time; - alrm1 = (alarm_t) &clock->cl_alarm; - while ((alrm2 = alrm1->al_next) != NULL) { - queue_time = &alrm2->al_time; - if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0) - break; - alrm1 = alrm2; - } - alrm1->al_next = alarm; - alarm->al_next = alrm2; - alarm->al_prev = alrm1; - if (alrm2) - alrm2->al_prev = alarm; + total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; - /* - * If the inserted alarm is the 'earliest' alarm, - * reset the device layer alarm time accordingly. - */ - if (clock->cl_alarm.al_next == alarm) - (*clock->cl_ops->c_setalrm)(alarm_time); -} + commpage_set_timestamp(0,0,0); -/* - * Check the validity of 'alarm_time' and 'alarm_type'. If either - * argument is invalid, return a negative value. If the 'alarm_time' - * is now, return a 0 value. If the 'alarm_time' is in the future, - * return a positive value. - */ -static -int -check_time( - alarm_type_t alarm_type, - mach_timespec_t *alarm_time, - mach_timespec_t *clock_time) -{ - int result; + now = mach_absolute_time(); - if (BAD_ALRMTYPE(alarm_type)) - return (-1); - if (BAD_MACH_TIMESPEC(alarm_time)) - return (-1); - if ((alarm_type & ALRMTYPE) == TIME_RELATIVE) - ADD_MACH_TIMESPEC(alarm_time, clock_time); + ototal = calend_adjtotal; - result = CMP_MACH_TIMESPEC(alarm_time, clock_time); + if (total != 0) { + int32_t delta = calend_adjskew; - return ((result >= 0)? result: 0); -} + if (total > 0) { + if (total > calend_adjbig) + delta *= 10; + if (delta > total) + delta = total; -mach_timespec_t -clock_get_system_value(void) -{ - clock_t clock = &clock_list[SYSTEM_CLOCK]; - mach_timespec_t value; + nanoseconds_to_absolutetime((uint64_t)delta, &t64); + calend_adjoffset = t64; + } + else { + if (total < -calend_adjbig) + delta *= 10; + delta = -delta; + if (delta < total) + delta = total; - (void) (*clock->cl_ops->c_gettime)(&value); + calend_adjstart = now; - return value; -} + nanoseconds_to_absolutetime((uint64_t)-delta, &t64); + calend_adjoffset = t64; + } -mach_timespec_t -clock_get_calendar_value(void) -{ - clock_t clock = &clock_list[CALENDAR_CLOCK]; - mach_timespec_t value = MACH_TIMESPEC_ZERO; + calend_adjtotal = total; + calend_adjdelta = delta; + + interval = calend_adjinterval; + } + else + calend_adjdelta = calend_adjtotal = 0; - (void) (*clock->cl_ops->c_gettime)(&value); + if (ototal != 0) { + *secs = ototal / NSEC_PER_SEC; + *microsecs = (ototal % NSEC_PER_SEC) / NSEC_PER_USEC; + } + else + *secs = *microsecs = 0; - return value; + return (interval); } -void -clock_deadline_for_periodic_event( - uint64_t interval, - uint64_t abstime, - uint64_t *deadline) +static void +calend_adjust_call(void) { - assert(interval != 0); + uint32_t interval; + spl_t s; - *deadline += interval; + s = splclock(); + simple_lock(&clock_lock); - if (*deadline <= abstime) { - *deadline = abstime + interval; - abstime = mach_absolute_time(); + if (--calend_adjactive == 0) { + interval = calend_adjust(); + if (interval != 0) { + clock_deadline_for_periodic_event(interval, mach_absolute_time(), + &calend_adjdeadline); - if (*deadline <= abstime) - *deadline = abstime + interval; + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) + calend_adjactive++; + } } + + simple_unlock(&clock_lock); + splx(s); } -void -mk_timebase_info_trap( - struct mk_timebase_info_trap_args *args) +static uint32_t +calend_adjust(void) { - uint32_t *delta = args->delta; - uint32_t *abs_to_ns_numer = args->abs_to_ns_numer; - uint32_t *abs_to_ns_denom = args->abs_to_ns_denom; - uint32_t *proc_to_abs_numer = args->proc_to_abs_numer; - uint32_t *proc_to_abs_denom = args->proc_to_abs_denom; - mach_timebase_info_data_t info; - uint32_t one = 1; + uint64_t now, t64; + int32_t delta; + uint32_t interval = 0; - clock_timebase_info(&info); + commpage_set_timestamp(0,0,0); - copyout((void *)&one, CAST_USER_ADDR_T(delta), sizeof (uint32_t)); + now = mach_absolute_time(); - copyout((void *)&info.numer, CAST_USER_ADDR_T(abs_to_ns_numer), sizeof (uint32_t)); - copyout((void *)&info.denom, CAST_USER_ADDR_T(abs_to_ns_denom), sizeof (uint32_t)); + delta = calend_adjdelta; - copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_numer), sizeof (uint32_t)); - copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_denom), sizeof (uint32_t)); -} + if (delta > 0) { + clock_calend.offset += calend_adjoffset; -kern_return_t -mach_timebase_info_trap( - struct mach_timebase_info_trap_args *args) -{ - mach_vm_address_t out_info_addr = args->info; - mach_timebase_info_data_t info; + calend_adjtotal -= delta; + if (delta > calend_adjtotal) { + calend_adjdelta = delta = calend_adjtotal; - clock_timebase_info(&info); + nanoseconds_to_absolutetime((uint64_t)delta, &t64); + calend_adjoffset = t64; + } + } + else + if (delta < 0) { + clock_calend.offset -= calend_adjoffset; - copyout((void *)&info, out_info_addr, sizeof (info)); + calend_adjtotal -= delta; + if (delta < calend_adjtotal) { + calend_adjdelta = delta = calend_adjtotal; - return (KERN_SUCCESS); + nanoseconds_to_absolutetime((uint64_t)-delta, &t64); + calend_adjoffset = t64; + } + + if (calend_adjdelta != 0) + calend_adjstart = now; + } + + if (calend_adjdelta != 0) + interval = calend_adjinterval; + + return (interval); +} + +/* + * clock_wakeup_calendar: + * + * Interface to power management, used + * to initiate the reset of the calendar + * on wake from sleep event. + */ +void +clock_wakeup_calendar(void) +{ + thread_call_enter(&calend_wakecall); } +/* + * Wait / delay routines. + */ static void mach_wait_until_continue( __unused void *parameter, @@ -889,9 +659,6 @@ mach_wait_until_trap( return ((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS); } -/* - * Delay primitives. - */ void clock_delay_until( uint64_t deadline) @@ -931,69 +698,52 @@ delay( delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC); } +/* + * Miscellaneous routines. + */ void -clock_adjtime( - int32_t *secs, - int32_t *microsecs) +clock_interval_to_deadline( + uint32_t interval, + uint32_t scale_factor, + uint64_t *result) { - uint32_t interval; - spl_t s; - - s = splclock(); - simple_lock(&calend_adjlock); - - interval = clock_set_calendar_adjtime(secs, microsecs); - if (interval != 0) { - if (calend_adjdeadline >= interval) - calend_adjdeadline -= interval; - clock_deadline_for_periodic_event(interval, mach_absolute_time(), - &calend_adjdeadline); + uint64_t abstime; - timer_call_enter(&calend_adjcall, calend_adjdeadline); - } - else - timer_call_cancel(&calend_adjcall); + clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); - simple_unlock(&calend_adjlock); - splx(s); + *result = mach_absolute_time() + abstime; } -static void -calend_adjust_call( - __unused timer_call_param_t p0, - __unused timer_call_param_t p1) +void +clock_absolutetime_interval_to_deadline( + uint64_t abstime, + uint64_t *result) { - uint32_t interval; - spl_t s; - - s = splclock(); - simple_lock(&calend_adjlock); - - interval = clock_adjust_calendar(); - if (interval != 0) { - clock_deadline_for_periodic_event(interval, mach_absolute_time(), - &calend_adjdeadline); - - timer_call_enter(&calend_adjcall, calend_adjdeadline); - } - - simple_unlock(&calend_adjlock); - splx(s); + *result = mach_absolute_time() + abstime; } void -clock_wakeup_calendar(void) +clock_get_uptime( + uint64_t *result) { - thread_call_enter(&calend_wakecall); + *result = mach_absolute_time(); } -extern void IOKitResetTime(void); /* XXX */ - -static void -calend_dowakeup( - __unused thread_call_param_t p0, - __unused thread_call_param_t p1) +void +clock_deadline_for_periodic_event( + uint64_t interval, + uint64_t abstime, + uint64_t *deadline) { + assert(interval != 0); + + *deadline += interval; + + if (*deadline <= abstime) { + *deadline = abstime + interval; + abstime = mach_absolute_time(); - IOKitResetTime(); + if (*deadline <= abstime) + *deadline = abstime + interval; + } } diff --git a/osfmk/kern/clock.h b/osfmk/kern/clock.h index f90001360..ee52ad8f2 100644 --- a/osfmk/kern/clock.h +++ b/osfmk/kern/clock.h @@ -23,10 +23,6 @@ * @OSF_COPYRIGHT@ */ /* - * File: kern/clock.h - * Purpose: Data structures for the kernel alarm clock - * facility. This file is used only by kernel - * level clock facility routines. */ #ifndef _KERN_CLOCK_H_ @@ -44,44 +40,9 @@ #ifdef MACH_KERNEL_PRIVATE -/* - * Actual clock alarm structure. Used for user clock_sleep() and - * clock_alarm() calls. Alarms are allocated from the alarm free - * list and entered in time priority order into the active alarm - * chain of the target clock. - */ -struct alarm { - struct alarm *al_next; /* next alarm in chain */ - struct alarm *al_prev; /* previous alarm in chain */ - int al_status; /* alarm status */ - mach_timespec_t al_time; /* alarm time */ - struct { /* message alarm data */ - int type; /* alarm type */ - ipc_port_t port; /* alarm port */ - mach_msg_type_name_t - port_type; /* alarm port type */ - struct clock *clock; /* alarm clock */ - void *data; /* alarm data */ - } al_alrm; -#define al_type al_alrm.type -#define al_port al_alrm.port -#define al_port_type al_alrm.port_type -#define al_clock al_alrm.clock -#define al_data al_alrm.data - long al_seqno; /* alarm sequence number */ -}; -typedef struct alarm alarm_data_t; - -/* alarm status */ -#define ALARM_FREE 0 /* alarm is on free list */ -#define ALARM_SLEEP 1 /* active clock_sleep() */ -#define ALARM_CLOCK 2 /* active clock_alarm() */ -#define ALARM_DONE 4 /* alarm has expired */ - /* * Clock operations list structure. Contains vectors to machine - * dependent clock routines. The routines c_config, c_init, and - * c_gettime must be implemented for every clock device. + * dependent clock routines. */ struct clock_ops { int (*c_config)(void); /* configuration */ @@ -91,37 +52,22 @@ struct clock_ops { kern_return_t (*c_gettime)( /* get time */ mach_timespec_t *cur_time); - kern_return_t (*c_settime)( /* set time */ - mach_timespec_t *clock_time); - kern_return_t (*c_getattr)( /* get attributes */ clock_flavor_t flavor, clock_attr_t attr, mach_msg_type_number_t *count); - - kern_return_t (*c_setattr)( /* set attributes */ - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t count); - - void (*c_setalrm)( /* set next alarm */ - mach_timespec_t *alarm_time); }; typedef struct clock_ops *clock_ops_t; typedef struct clock_ops clock_ops_data_t; /* * Actual clock object data structure. Contains the machine - * dependent operations list, clock operations ports, and a - * chain of pending alarms. + * dependent operations list and clock operation ports. */ struct clock { clock_ops_t cl_ops; /* operations list */ struct ipc_port *cl_service; /* service port */ struct ipc_port *cl_control; /* control port */ - struct { /* alarm chain head */ - struct alarm *al_next; - } cl_alarm; }; typedef struct clock clock_data_t; @@ -129,11 +75,13 @@ typedef struct clock clock_data_t; * Configure the clock system. */ extern void clock_config(void); +extern void clock_oldconfig(void); /* * Initialize the clock system. */ extern void clock_init(void); +extern void clock_oldinit(void); extern void clock_timebase_init(void); @@ -142,20 +90,6 @@ extern void clock_timebase_init(void); */ extern void clock_service_create(void); -/* - * Service clock alarm interrupts. Called from machine dependent - * layer at splclock(). The clock_id argument specifies the clock, - * and the clock_time argument gives that clock's current time. - */ -extern void clock_alarm_intr( - clock_id_t clock_id, - mach_timespec_t *clock_time); - -extern kern_return_t clock_sleep_internal( - clock_t clock, - sleep_type_t sleep_type, - mach_timespec_t *sleep_time); - typedef void (*clock_timer_func_t)( uint64_t timestamp); @@ -165,11 +99,12 @@ extern void clock_set_timer_func( extern void clock_set_timer_deadline( uint64_t deadline); -extern uint32_t clock_set_calendar_adjtime( - int32_t *secs, - int32_t *microsecs); - -extern uint32_t clock_adjust_calendar(void); +extern void clock_gettimeofday_set_commpage( + uint64_t abstime, + uint64_t epoch, + uint64_t offset, + uint32_t *secs, + uint32_t *microsecs); extern void machine_delay_until( uint64_t deadline); @@ -188,6 +123,16 @@ extern void absolutetime_to_microtime( uint32_t *secs, uint32_t *microsecs); +extern void absolutetime_to_nanotime( + uint64_t abstime, + uint32_t *secs, + uint32_t *nanosecs); + +extern void nanotime_to_absolutetime( + uint32_t secs, + uint32_t nanosecs, + uint64_t *result); + #endif /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS diff --git a/osfmk/kern/clock_oldops.c b/osfmk/kern/clock_oldops.c new file mode 100644 index 000000000..ac03a7d9e --- /dev/null +++ b/osfmk/kern/clock_oldops.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * DEPRECATED INTERFACES - Should be removed + * + * Purpose: Routines for the creation and use of kernel + * alarm clock services. This file and the ipc + * routines in kern/ipc_clock.c constitute the + * machine-independent clock service layer. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +/* + * Actual clock alarm structure. Used for user clock_sleep() and + * clock_alarm() calls. Alarms are allocated from the alarm free + * list and entered in time priority order into the active alarm + * chain of the target clock. + */ +struct alarm { + struct alarm *al_next; /* next alarm in chain */ + struct alarm *al_prev; /* previous alarm in chain */ + int al_status; /* alarm status */ + mach_timespec_t al_time; /* alarm time */ + struct { /* message alarm data */ + int type; /* alarm type */ + ipc_port_t port; /* alarm port */ + mach_msg_type_name_t + port_type; /* alarm port type */ + struct clock *clock; /* alarm clock */ + void *data; /* alarm data */ + } al_alrm; +#define al_type al_alrm.type +#define al_port al_alrm.port +#define al_port_type al_alrm.port_type +#define al_clock al_alrm.clock +#define al_data al_alrm.data + long al_seqno; /* alarm sequence number */ +}; +typedef struct alarm alarm_data_t; + +/* alarm status */ +#define ALARM_FREE 0 /* alarm is on free list */ +#define ALARM_SLEEP 1 /* active clock_sleep() */ +#define ALARM_CLOCK 2 /* active clock_alarm() */ +#define ALARM_DONE 4 /* alarm has expired */ + +/* local data declarations */ +decl_simple_lock_data(static,alarm_lock) /* alarm synchronization */ +static struct zone *alarm_zone; /* zone for user alarms */ +static struct alarm *alrmfree; /* alarm free list pointer */ +static struct alarm *alrmdone; /* alarm done list pointer */ +static struct alarm *alrmlist; +static long alrm_seqno; /* uniquely identifies alarms */ +static thread_call_data_t alarm_done_call; +static timer_call_data_t alarm_expire_timer; + +extern struct clock clock_list[]; +extern int clock_count; + +static void post_alarm( + alarm_t alarm); + +static void set_alarm( + mach_timespec_t *alarm_time); + +static int check_time( + alarm_type_t alarm_type, + mach_timespec_t *alarm_time, + mach_timespec_t *clock_time); + +static void alarm_done(void); + +static void alarm_expire(void); + +static kern_return_t clock_sleep_internal( + clock_t clock, + sleep_type_t sleep_type, + mach_timespec_t *sleep_time); + +int rtclock_config(void); + +int rtclock_init(void); + +kern_return_t rtclock_gettime( + mach_timespec_t *cur_time); + +kern_return_t rtclock_getattr( + clock_flavor_t flavor, + clock_attr_t attr, + mach_msg_type_number_t *count); + +struct clock_ops sysclk_ops = { + rtclock_config, rtclock_init, + rtclock_gettime, + rtclock_getattr, +}; + +kern_return_t calend_gettime( + mach_timespec_t *cur_time); + +kern_return_t calend_getattr( + clock_flavor_t flavor, + clock_attr_t attr, + mach_msg_type_number_t *count); + +struct clock_ops calend_ops = { + 0, 0, + calend_gettime, + calend_getattr, +}; + +/* + * Macros to lock/unlock clock system. + */ +#define LOCK_ALARM(s) \ + s = splclock(); \ + simple_lock(&alarm_lock); + +#define UNLOCK_ALARM(s) \ + simple_unlock(&alarm_lock); \ + splx(s); + +void +clock_oldconfig(void) +{ + clock_t clock; + register int i; + + simple_lock_init(&alarm_lock, 0); + thread_call_setup(&alarm_done_call, (thread_call_func_t)alarm_done, NULL); + timer_call_setup(&alarm_expire_timer, (timer_call_func_t)alarm_expire, NULL); + + /* + * Configure clock devices. + */ + for (i = 0; i < clock_count; i++) { + clock = &clock_list[i]; + if (clock->cl_ops && clock->cl_ops->c_config) { + if ((*clock->cl_ops->c_config)() == 0) + clock->cl_ops = 0; + } + } + + /* start alarm sequence numbers at 0 */ + alrm_seqno = 0; +} + +void +clock_oldinit(void) +{ + clock_t clock; + register int i; + + /* + * Initialize basic clock structures. + */ + for (i = 0; i < clock_count; i++) { + clock = &clock_list[i]; + if (clock->cl_ops && clock->cl_ops->c_init) + (*clock->cl_ops->c_init)(); + } +} + +/* + * Initialize the clock ipc service facility. + */ +void +clock_service_create(void) +{ + clock_t clock; + register int i; + + /* + * Initialize ipc clock services. + */ + for (i = 0; i < clock_count; i++) { + clock = &clock_list[i]; + if (clock->cl_ops) { + ipc_clock_init(clock); + ipc_clock_enable(clock); + } + } + + /* + * Perform miscellaneous late + * initialization. + */ + i = sizeof(struct alarm); + alarm_zone = zinit(i, (4096/i)*i, 10*i, "alarms"); +} + +/* + * Get the service port on a clock. + */ +kern_return_t +host_get_clock_service( + host_t host, + clock_id_t clock_id, + clock_t *clock) /* OUT */ +{ + if (host == HOST_NULL || clock_id < 0 || clock_id >= clock_count) { + *clock = CLOCK_NULL; + return (KERN_INVALID_ARGUMENT); + } + + *clock = &clock_list[clock_id]; + if ((*clock)->cl_ops == 0) + return (KERN_FAILURE); + return (KERN_SUCCESS); +} + +/* + * Get the control port on a clock. + */ +kern_return_t +host_get_clock_control( + host_priv_t host_priv, + clock_id_t clock_id, + clock_t *clock) /* OUT */ +{ + if (host_priv == HOST_PRIV_NULL || + clock_id < 0 || clock_id >= clock_count) { + *clock = CLOCK_NULL; + return (KERN_INVALID_ARGUMENT); + } + + *clock = &clock_list[clock_id]; + if ((*clock)->cl_ops == 0) + return (KERN_FAILURE); + return (KERN_SUCCESS); +} + +/* + * Get the current clock time. + */ +kern_return_t +clock_get_time( + clock_t clock, + mach_timespec_t *cur_time) /* OUT */ +{ + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + return ((*clock->cl_ops->c_gettime)(cur_time)); +} + +kern_return_t +rtclock_gettime( + mach_timespec_t *time) /* OUT */ +{ + clock_get_system_nanotime(&time->tv_sec, (uint32_t *)&time->tv_nsec); + + return (KERN_SUCCESS); +} + +kern_return_t +calend_gettime( + mach_timespec_t *time) /* OUT */ +{ + clock_get_calendar_nanotime(&time->tv_sec, (uint32_t *)&time->tv_nsec); + + return (KERN_SUCCESS); +} + +/* + * Get clock attributes. + */ +kern_return_t +clock_get_attributes( + clock_t clock, + clock_flavor_t flavor, + clock_attr_t attr, /* OUT */ + mach_msg_type_number_t *count) /* IN/OUT */ +{ + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + if (clock->cl_ops->c_getattr) + return (clock->cl_ops->c_getattr(flavor, attr, count)); + return (KERN_FAILURE); +} + +kern_return_t +rtclock_getattr( + clock_flavor_t flavor, + clock_attr_t attr, /* OUT */ + mach_msg_type_number_t *count) /* IN/OUT */ +{ + if (*count != 1) + return (KERN_FAILURE); + + switch (flavor) { + + case CLOCK_GET_TIME_RES: /* >0 res */ + case CLOCK_ALARM_CURRES: /* =0 no alarm */ + case CLOCK_ALARM_MINRES: + case CLOCK_ALARM_MAXRES: + *(clock_res_t *) attr = NSEC_PER_SEC / 100; + break; + + default: + return (KERN_INVALID_VALUE); + } + + return (KERN_SUCCESS); +} + +kern_return_t +calend_getattr( + clock_flavor_t flavor, + clock_attr_t attr, /* OUT */ + mach_msg_type_number_t *count) /* IN/OUT */ +{ + if (*count != 1) + return (KERN_FAILURE); + + switch (flavor) { + + case CLOCK_GET_TIME_RES: /* >0 res */ + *(clock_res_t *) attr = NSEC_PER_SEC / 100; + break; + + case CLOCK_ALARM_CURRES: /* =0 no alarm */ + case CLOCK_ALARM_MINRES: + case CLOCK_ALARM_MAXRES: + *(clock_res_t *) attr = 0; + break; + + default: + return (KERN_INVALID_VALUE); + } + + return (KERN_SUCCESS); +} + +/* + * Set the current clock time. + */ +kern_return_t +clock_set_time( + clock_t clock, +__unused mach_timespec_t new_time) +{ + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + return (KERN_FAILURE); +} + +/* + * Set the clock alarm resolution. + */ +kern_return_t +clock_set_attributes( + clock_t clock, +__unused clock_flavor_t flavor, +__unused clock_attr_t attr, +__unused mach_msg_type_number_t count) +{ + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + return (KERN_FAILURE); +} + +/* + * Setup a clock alarm. + */ +kern_return_t +clock_alarm( + clock_t clock, + alarm_type_t alarm_type, + mach_timespec_t alarm_time, + ipc_port_t alarm_port, + mach_msg_type_name_t alarm_port_type) +{ + alarm_t alarm; + mach_timespec_t clock_time; + int chkstat; + kern_return_t reply_code; + spl_t s; + + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + if (clock != &clock_list[SYSTEM_CLOCK]) + return (KERN_FAILURE); + if (IP_VALID(alarm_port) == 0) + return (KERN_INVALID_CAPABILITY); + + /* + * Check alarm parameters. If parameters are invalid, + * send alarm message immediately. + */ + (*clock->cl_ops->c_gettime)(&clock_time); + chkstat = check_time(alarm_type, &alarm_time, &clock_time); + if (chkstat <= 0) { + reply_code = (chkstat < 0 ? KERN_INVALID_VALUE : KERN_SUCCESS); + clock_alarm_reply(alarm_port, alarm_port_type, + reply_code, alarm_type, clock_time); + return (KERN_SUCCESS); + } + + /* + * Get alarm and add to clock alarm list. + */ + + LOCK_ALARM(s); + if ((alarm = alrmfree) == 0) { + UNLOCK_ALARM(s); + alarm = (alarm_t) zalloc(alarm_zone); + if (alarm == 0) + return (KERN_RESOURCE_SHORTAGE); + LOCK_ALARM(s); + } + else + alrmfree = alarm->al_next; + + alarm->al_status = ALARM_CLOCK; + alarm->al_time = alarm_time; + alarm->al_type = alarm_type; + alarm->al_port = alarm_port; + alarm->al_port_type = alarm_port_type; + alarm->al_clock = clock; + alarm->al_seqno = alrm_seqno++; + post_alarm(alarm); + UNLOCK_ALARM(s); + + return (KERN_SUCCESS); +} + +/* + * Sleep on a clock. System trap. User-level libmach clock_sleep + * interface call takes a mach_timespec_t sleep_time argument which it + * converts to sleep_sec and sleep_nsec arguments which are then + * passed to clock_sleep_trap. + */ +kern_return_t +clock_sleep_trap( + struct clock_sleep_trap_args *args) +{ + mach_port_name_t clock_name = args->clock_name; + sleep_type_t sleep_type = args->sleep_type; + int sleep_sec = args->sleep_sec; + int sleep_nsec = args->sleep_nsec; + mach_vm_address_t wakeup_time_addr = args->wakeup_time; + clock_t clock; + mach_timespec_t swtime; + kern_return_t rvalue; + + /* + * Convert the trap parameters. + */ + if (clock_name == MACH_PORT_NULL) + clock = &clock_list[SYSTEM_CLOCK]; + else + clock = port_name_to_clock(clock_name); + + swtime.tv_sec = sleep_sec; + swtime.tv_nsec = sleep_nsec; + + /* + * Call the actual clock_sleep routine. + */ + rvalue = clock_sleep_internal(clock, sleep_type, &swtime); + + /* + * Return current time as wakeup time. + */ + if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) { + copyout((char *)&swtime, wakeup_time_addr, sizeof(mach_timespec_t)); + } + return (rvalue); +} + +static kern_return_t +clock_sleep_internal( + clock_t clock, + sleep_type_t sleep_type, + mach_timespec_t *sleep_time) +{ + alarm_t alarm; + mach_timespec_t clock_time; + kern_return_t rvalue; + int chkstat; + spl_t s; + + if (clock == CLOCK_NULL) + return (KERN_INVALID_ARGUMENT); + + if (clock != &clock_list[SYSTEM_CLOCK]) + return (KERN_FAILURE); + + /* + * Check sleep parameters. If parameters are invalid + * return an error, otherwise post alarm request. + */ + (*clock->cl_ops->c_gettime)(&clock_time); + + chkstat = check_time(sleep_type, sleep_time, &clock_time); + if (chkstat < 0) + return (KERN_INVALID_VALUE); + rvalue = KERN_SUCCESS; + if (chkstat > 0) { + wait_result_t wait_result; + + /* + * Get alarm and add to clock alarm list. + */ + + LOCK_ALARM(s); + if ((alarm = alrmfree) == 0) { + UNLOCK_ALARM(s); + alarm = (alarm_t) zalloc(alarm_zone); + if (alarm == 0) + return (KERN_RESOURCE_SHORTAGE); + LOCK_ALARM(s); + } + else + alrmfree = alarm->al_next; + + /* + * Wait for alarm to occur. + */ + wait_result = assert_wait((event_t)alarm, THREAD_ABORTSAFE); + if (wait_result == THREAD_WAITING) { + alarm->al_time = *sleep_time; + alarm->al_status = ALARM_SLEEP; + post_alarm(alarm); + UNLOCK_ALARM(s); + + wait_result = thread_block(THREAD_CONTINUE_NULL); + + /* + * Note if alarm expired normally or whether it + * was aborted. If aborted, delete alarm from + * clock alarm list. Return alarm to free list. + */ + LOCK_ALARM(s); + if (alarm->al_status != ALARM_DONE) { + assert(wait_result != THREAD_AWAKENED); + if (((alarm->al_prev)->al_next = alarm->al_next) != NULL) + (alarm->al_next)->al_prev = alarm->al_prev; + rvalue = KERN_ABORTED; + } + *sleep_time = alarm->al_time; + alarm->al_status = ALARM_FREE; + } else { + assert(wait_result == THREAD_INTERRUPTED); + assert(alarm->al_status == ALARM_FREE); + rvalue = KERN_ABORTED; + } + alarm->al_next = alrmfree; + alrmfree = alarm; + UNLOCK_ALARM(s); + } + else + *sleep_time = clock_time; + + return (rvalue); +} + +/* + * Service clock alarm expirations. + */ +static void +alarm_expire(void) +{ + clock_t clock; + register alarm_t alrm1; + register alarm_t alrm2; + mach_timespec_t clock_time; + mach_timespec_t *alarm_time; + spl_t s; + + clock = &clock_list[SYSTEM_CLOCK]; + (*clock->cl_ops->c_gettime)(&clock_time); + + /* + * Update clock alarm list. Alarms that are due are moved + * to the alarmdone list to be serviced by a thread callout. + */ + LOCK_ALARM(s); + alrm1 = (alarm_t)&alrmlist; + while ((alrm2 = alrm1->al_next) != NULL) { + alarm_time = &alrm2->al_time; + if (CMP_MACH_TIMESPEC(alarm_time, &clock_time) > 0) + break; + + /* + * Alarm has expired, so remove it from the + * clock alarm list. + */ + if ((alrm1->al_next = alrm2->al_next) != NULL) + (alrm1->al_next)->al_prev = alrm1; + + /* + * If a clock_sleep() alarm, wakeup the thread + * which issued the clock_sleep() call. + */ + if (alrm2->al_status == ALARM_SLEEP) { + alrm2->al_next = 0; + alrm2->al_status = ALARM_DONE; + alrm2->al_time = clock_time; + thread_wakeup((event_t)alrm2); + } + + /* + * If a clock_alarm() alarm, place the alarm on + * the alarm done list and schedule the alarm + * delivery mechanism. + */ + else { + assert(alrm2->al_status == ALARM_CLOCK); + if ((alrm2->al_next = alrmdone) != NULL) + alrmdone->al_prev = alrm2; + else + thread_call_enter(&alarm_done_call); + alrm2->al_prev = (alarm_t)&alrmdone; + alrmdone = alrm2; + alrm2->al_status = ALARM_DONE; + alrm2->al_time = clock_time; + } + } + + /* + * Setup to expire for the next pending alarm. + */ + if (alrm2) + set_alarm(alarm_time); + UNLOCK_ALARM(s); +} + +static void +alarm_done(void) +{ + register alarm_t alrm; + kern_return_t code; + spl_t s; + + LOCK_ALARM(s); + while ((alrm = alrmdone) != NULL) { + if ((alrmdone = alrm->al_next) != NULL) + alrmdone->al_prev = (alarm_t)&alrmdone; + UNLOCK_ALARM(s); + + code = (alrm->al_status == ALARM_DONE? KERN_SUCCESS: KERN_ABORTED); + if (alrm->al_port != IP_NULL) { + /* Deliver message to designated port */ + if (IP_VALID(alrm->al_port)) { + clock_alarm_reply(alrm->al_port, alrm->al_port_type, code, + alrm->al_type, alrm->al_time); + } + + LOCK_ALARM(s); + alrm->al_status = ALARM_FREE; + alrm->al_next = alrmfree; + alrmfree = alrm; + } + else + panic("clock_alarm_deliver"); + } + + UNLOCK_ALARM(s); +} + +/* + * Post an alarm on the active alarm list. + * + * Always called from within a LOCK_ALARM() code section. + */ +static void +post_alarm( + alarm_t alarm) +{ + register alarm_t alrm1, alrm2; + mach_timespec_t *alarm_time; + mach_timespec_t *queue_time; + + /* + * Traverse alarm list until queue time is greater + * than alarm time, then insert alarm. + */ + alarm_time = &alarm->al_time; + alrm1 = (alarm_t)&alrmlist; + while ((alrm2 = alrm1->al_next) != NULL) { + queue_time = &alrm2->al_time; + if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0) + break; + alrm1 = alrm2; + } + alrm1->al_next = alarm; + alarm->al_next = alrm2; + alarm->al_prev = alrm1; + if (alrm2) + alrm2->al_prev = alarm; + + /* + * If the inserted alarm is the 'earliest' alarm, + * reset the device layer alarm time accordingly. + */ + if (alrmlist == alarm) + set_alarm(alarm_time); +} + +static void +set_alarm( + mach_timespec_t *alarm_time) +{ + uint64_t abstime; + + nanotime_to_absolutetime(alarm_time->tv_sec, alarm_time->tv_nsec, &abstime); + timer_call_enter(&alarm_expire_timer, abstime); +} + +/* + * Check the validity of 'alarm_time' and 'alarm_type'. If either + * argument is invalid, return a negative value. If the 'alarm_time' + * is now, return a 0 value. If the 'alarm_time' is in the future, + * return a positive value. + */ +static int +check_time( + alarm_type_t alarm_type, + mach_timespec_t *alarm_time, + mach_timespec_t *clock_time) +{ + int result; + + if (BAD_ALRMTYPE(alarm_type)) + return (-1); + if (BAD_MACH_TIMESPEC(alarm_time)) + return (-1); + if ((alarm_type & ALRMTYPE) == TIME_RELATIVE) + ADD_MACH_TIMESPEC(alarm_time, clock_time); + + result = CMP_MACH_TIMESPEC(alarm_time, clock_time); + + return ((result >= 0)? result: 0); +} + +mach_timespec_t +clock_get_system_value(void) +{ + clock_t clock = &clock_list[SYSTEM_CLOCK]; + mach_timespec_t value; + + (void) (*clock->cl_ops->c_gettime)(&value); + + return value; +} + +mach_timespec_t +clock_get_calendar_value(void) +{ + clock_t clock = &clock_list[CALENDAR_CLOCK]; + mach_timespec_t value = MACH_TIMESPEC_ZERO; + + (void) (*clock->cl_ops->c_gettime)(&value); + + return value; +} diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index 128b02ea3..d546fbfe9 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -94,7 +94,7 @@ extern void panic_init(void); __BEGIN_DECLS -extern void panic(const char *string, ...); +extern void panic(const char *string, ...) __dead2; __END_DECLS diff --git a/osfmk/kern/etimer.c b/osfmk/kern/etimer.c new file mode 100644 index 000000000..67b99dbaf --- /dev/null +++ b/osfmk/kern/etimer.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: etimer.c + * Purpose: Routines for handling the machine independent + * event timer. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#ifdef __ppc__ +#include +#else +#include +#endif + +#include + + +/* XXX from /rtclock.c */ +uint32_t rtclock_tick_interval; +clock_timer_func_t rtclock_timer_expire; + +#ifdef __ppc__ +# define PER_PROC_INFO struct per_proc_info +# define GET_PER_PROC_INFO() getPerProc() +#else +# define PER_PROC_INFO cpu_data_t +# define GET_PER_PROC_INFO() current_cpu_datap() +#endif + +/* + * Event timer interrupt. + * + * XXX a drawback of this implementation is that events serviced earlier must not set deadlines + * that occur before the entire chain completes. + * + * XXX a better implementation would use a set of generic callouts and iterate over them + */ +void etimer_intr(int inuser, uint64_t iaddr) { + + uint64_t abstime; + rtclock_timer_t *mytimer; + PER_PROC_INFO *pp; + + pp = GET_PER_PROC_INFO(); + + mytimer = &pp->rtclock_timer; /* Point to the event timer */ + + abstime = mach_absolute_time(); /* Get the time now */ + + /* is it time for power management state change? */ + if (pp->pms.pmsPop <= abstime) { + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0); + pmsStep(1); /* Yes, advance step */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0); + + abstime = mach_absolute_time(); /* Get the time again since we ran a bit */ + } + + /* have we passed the rtclock pop time? */ + if (pp->rtclock_intr_deadline <= abstime) { + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 4) | DBG_FUNC_START, (int)rtclock_tick_interval, 0, 0, 0, 0); + + clock_deadline_for_periodic_event(rtclock_tick_interval, + abstime, + &pp->rtclock_intr_deadline); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 4) | DBG_FUNC_END, 0, 0, 0, 0, 0); +#if STAT_TIME + hertz_tick(NSEC_PER_HZ, inuser, iaddr); /* Accumulate hertz */ +#else + hertz_tick(inuser, iaddr); /* Accumulate hertz */ +#endif + + abstime = mach_absolute_time(); /* Refresh the current time since we went away */ + } + + /* has a pending clock timer expired? */ + if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */ + mytimer->has_expired = TRUE; /* Remember that we popped */ + mytimer->deadline = EndOfAllTime; /* Set timer request to the end of all time in case we have no more events */ + (*rtclock_timer_expire)(abstime); /* Process pop */ + mytimer->has_expired = FALSE; + } + + /* schedule our next deadline */ + pp->rtcPop = EndOfAllTime; /* any real deadline will be earlier */ + etimer_resync_deadlines(); +} + +/* + * Set the clock deadline; called by the thread scheduler. + */ +void etimer_set_deadline(uint64_t deadline) +{ + rtclock_timer_t *mytimer; + spl_t s; + PER_PROC_INFO *pp; + + s = splclock(); /* no interruptions */ + pp = GET_PER_PROC_INFO(); + + mytimer = &pp->rtclock_timer; /* Point to the timer itself */ + mytimer->deadline = deadline; /* Set the new expiration time */ + + etimer_resync_deadlines(); + + splx(s); +} + +/* + * Re-evaluate the outstanding deadlines and select the most proximate. + * + * Should be called at splclock. + */ +void +etimer_resync_deadlines(void) +{ + uint64_t deadline; + rtclock_timer_t *mytimer; + spl_t s = splclock(); /* No interruptions please */ + PER_PROC_INFO *pp; + + pp = GET_PER_PROC_INFO(); + + deadline = 0; + + /* next rtclock interrupt? */ + if (pp->rtclock_intr_deadline > 0) + deadline = pp->rtclock_intr_deadline; + + /* if we have a clock timer set sooner, pop on that */ + mytimer = &pp->rtclock_timer; /* Point to the timer itself */ + if ((!mytimer->has_expired) && (mytimer->deadline > 0) && (mytimer->deadline < deadline)) + deadline = mytimer->deadline; + + /* if we have a power management event coming up, how about that? */ + if ((pp->pms.pmsPop > 0) && (pp->pms.pmsPop < deadline)) + deadline = pp->pms.pmsPop; + +#ifdef __ppc__ +#endif + + if ((deadline > 0) && (deadline < pp->rtcPop)) { + int decr; + + pp->rtcPop = deadline; + decr = setPop(deadline); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0); + } + splx(s); +} diff --git a/osfmk/kern/etimer.h b/osfmk/kern/etimer.h new file mode 100644 index 000000000..3b5c4bf51 --- /dev/null +++ b/osfmk/kern/etimer.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: etimer.h + * Purpose: Routines for handling the machine independent + * real-time clock. + */ + +#ifndef _ETIMER_H_ +#define _ETIMER_H_ + +#define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL + +/* extern void rtclock_intr(int inuser, uint64_t iaddr); - this is currently MD */ +typedef void (*etimer_intr_t)(int, uint64_t); + +extern int setTimerReq(void); +extern void etimer_intr(int inuser, uint64_t iaddr); + +extern void etimer_set_deadline(uint64_t deadline); +extern int setPop(uint64_t time); + +extern void etimer_resync_deadlines(void); + +extern uint32_t rtclock_tick_interval; + +extern uint64_t tsc_to_nanoseconds(uint64_t abstime); + +#if 0 /* this is currently still MD */ +#pragma pack(push,4) +struct rtclock_timer_t { + uint64_t deadline; + uint32_t + /*boolean_t*/ is_set:1, + has_expired:1, + :0; +}; +#pragma pack(pop) +typedef struct rtclock_timer_t rtclock_timer_t; +#endif /* MD */ + + +#endif /* _ETIMER_H_ */ diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c index e43ee941e..4bb00c5dd 100644 --- a/osfmk/kern/hibernate.c +++ b/osfmk/kern/hibernate.c @@ -51,17 +51,13 @@ hibernate_page_list_zero(hibernate_page_list_t *list) bitmap = &list->bank_bitmap[0]; for (bank = 0; bank < list->bank_count; bank++) { - uint32_t bit, last_bit; - uint32_t *bitmap_word; + uint32_t last_bit; bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); - - // Set out-of-bound bits at end of bitmap. - bitmap_word = &bitmap->bitmap[bitmap->bitmapwords - 1]; - last_bit = ((bitmap->last_page - bitmap->first_page) & 31); - for (bit = 31; bit > last_bit; bit--) { - *bitmap_word |= (0x80000000 >> bit); - } + // set out-of-bound bits at end of bitmap. + last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31); + if (last_bit) + bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit); bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } @@ -165,6 +161,7 @@ discard_page(vm_page_t m) pages known to VM to not need saving are subtracted. Wired pages to be saved are present in page_list_wired, pageable in page_list. */ +extern vm_page_t vm_lopage_queue_free; void hibernate_page_list_setall(hibernate_page_list_t * page_list, @@ -206,6 +203,16 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, m = (vm_page_t) m->pageq.next; } + m = (vm_page_t) vm_lopage_queue_free; + while(m) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + m = (vm_page_t) m->pageq.next; + } + queue_iterate( &vm_page_queue_zf, m, vm_page_t, diff --git a/osfmk/kern/ipc_kobject.h b/osfmk/kern/ipc_kobject.h index 9c690bb4f..9d19732bd 100644 --- a/osfmk/kern/ipc_kobject.h +++ b/osfmk/kern/ipc_kobject.h @@ -95,7 +95,7 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_HOST_SECURITY 17 #define IKOT_LEDGER 18 #define IKOT_MASTER_DEVICE 19 -/* (unused) 20 */ +#define IKOT_TASK_NAME 20 #define IKOT_SUBSYSTEM 21 #define IKOT_IO_DONE_QUEUE 22 #define IKOT_SEMAPHORE 23 @@ -107,11 +107,14 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_IOKIT_CONNECT 29 #define IKOT_IOKIT_OBJECT 30 #define IKOT_UPL 31 - /* << new entries here */ -#define IKOT_UNKNOWN 32 /* magic catchall */ -#define IKOT_MAX_TYPE 33 /* # of IKOT_ types */ +#define IKOT_MEM_OBJ_CONTROL 32 +/* + * Add new entries here and adjust IKOT_UNKNOWN. + * Please keep ipc/ipc_object.c:ikot_print_array up to date. + */ +#define IKOT_UNKNOWN 33 /* magic catchall */ +#define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */ -/* Please keep ipc/ipc_object.c:ikot_print_array up to date */ #define is_ipc_kobject(ikot) ((ikot) != IKOT_NONE) diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index 3af0f19c5..a9e1d11f6 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -107,6 +107,7 @@ ipc_task_init( { ipc_space_t space; ipc_port_t kport; + ipc_port_t nport; kern_return_t kr; int i; @@ -120,8 +121,13 @@ ipc_task_init( if (kport == IP_NULL) panic("ipc_task_init"); + nport = ipc_port_alloc_kernel(); + if (nport == IP_NULL) + panic("ipc_task_init"); + itk_lock_init(task); task->itk_self = kport; + task->itk_nself = nport; task->itk_sself = ipc_port_make_send(kport); task->itk_space = space; space->is_fast = FALSE; @@ -184,11 +190,15 @@ ipc_task_enable( task_t task) { ipc_port_t kport; + ipc_port_t nport; itk_lock(task); kport = task->itk_self; if (kport != IP_NULL) ipc_kobject_set(kport, (ipc_kobject_t) task, IKOT_TASK); + nport = task->itk_nself; + if (nport != IP_NULL) + ipc_kobject_set(nport, (ipc_kobject_t) task, IKOT_TASK_NAME); itk_unlock(task); } @@ -205,11 +215,15 @@ ipc_task_disable( task_t task) { ipc_port_t kport; + ipc_port_t nport; itk_lock(task); kport = task->itk_self; if (kport != IP_NULL) ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); + nport = task->itk_nself; + if (nport != IP_NULL) + ipc_kobject_set(nport, IKO_NULL, IKOT_NONE); itk_unlock(task); } @@ -227,6 +241,7 @@ ipc_task_terminate( task_t task) { ipc_port_t kport; + ipc_port_t nport; int i; itk_lock(task); @@ -237,8 +252,12 @@ ipc_task_terminate( itk_unlock(task); return; } - task->itk_self = IP_NULL; + + nport = task->itk_nself; + assert(nport != IP_NULL); + task->itk_nself = IP_NULL; + itk_unlock(task); /* release the naked send rights */ @@ -265,15 +284,18 @@ ipc_task_terminate( ipc_port_release_send(task->wired_ledger_port); ipc_port_release_send(task->paged_ledger_port); - /* destroy the kernel port */ + /* destroy the kernel ports */ ipc_port_dealloc_kernel(kport); + ipc_port_dealloc_kernel(nport); } /* * Routine: ipc_task_reset * Purpose: * Reset a task's IPC state to protect it when - * it enters an elevated security context. + * it enters an elevated security context. The + * task name port can remain the same - since + * it represents no specific privilege. * Conditions: * Nothing locked. The task must be suspended. * (Or the current thread must be in the task.) @@ -695,44 +717,45 @@ task_get_special_port( int which, ipc_port_t *portp) { - ipc_port_t *whichp; ipc_port_t port; if (task == TASK_NULL) return KERN_INVALID_ARGUMENT; + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_FAILURE; + } + switch (which) { case TASK_KERNEL_PORT: - whichp = &task->itk_sself; + port = ipc_port_copy_send(task->itk_sself); + break; + + case TASK_NAME_PORT: + port = ipc_port_make_send(task->itk_nself); break; case TASK_HOST_PORT: - whichp = &task->itk_host; + port = ipc_port_copy_send(task->itk_host); break; case TASK_BOOTSTRAP_PORT: - whichp = &task->itk_bootstrap; + port = ipc_port_copy_send(task->itk_bootstrap); break; case TASK_WIRED_LEDGER_PORT: - whichp = &task->wired_ledger_port; + port = ipc_port_copy_send(task->wired_ledger_port); break; case TASK_PAGED_LEDGER_PORT: - whichp = &task->paged_ledger_port; + port = ipc_port_copy_send(task->paged_ledger_port); break; default: return KERN_INVALID_ARGUMENT; } - - itk_lock(task); - if (task->itk_self == IP_NULL) { - itk_unlock(task); - return KERN_FAILURE; - } - - port = ipc_port_copy_send(*whichp); itk_unlock(task); *portp = port; @@ -1014,6 +1037,39 @@ convert_port_to_task( return (task); } +/* + * Routine: convert_port_to_task_name + * Purpose: + * Convert from a port to a task name. + * Doesn't consume the port ref; produces a task name ref, + * which may be null. + * Conditions: + * Nothing locked. + */ +task_name_t +convert_port_to_task_name( + ipc_port_t port) +{ + task_name_t task = TASK_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + + if ( ip_active(port) && + (ip_kotype(port) == IKOT_TASK || + ip_kotype(port) == IKOT_TASK_NAME)) { + task = (task_name_t)port->ip_kobject; + assert(task != TASK_NAME_NULL); + + task_reference_internal(task); + } + + ip_unlock(port); + } + + return (task); +} + /* * Routine: convert_port_to_space * Purpose: @@ -1193,6 +1249,33 @@ convert_task_to_port( return port; } +/* + * Routine: convert_task_name_to_port + * Purpose: + * Convert from a task name ref to a port. + * Consumes a task name ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_task_name_to_port( + task_name_t task_name) +{ + ipc_port_t port; + + itk_lock(task_name); + if (task_name->itk_nself != IP_NULL) + port = ipc_port_make_send(task_name->itk_nself); + else + port = IP_NULL; + itk_unlock(task_name); + + task_name_deallocate(task_name); + return port; +} + /* * Routine: convert_thread_to_port * Purpose: diff --git a/osfmk/kern/ipc_tt.h b/osfmk/kern/ipc_tt.h index 63b0d6d60..38978a2fb 100644 --- a/osfmk/kern/ipc_tt.h +++ b/osfmk/kern/ipc_tt.h @@ -107,6 +107,10 @@ extern ipc_port_t retrieve_task_self_fast( extern ipc_port_t retrieve_thread_self_fast( thread_t thread); +/* Convert from a port to a task name */ +extern task_name_t convert_port_to_task_name( + ipc_port_t port); + /* Convert from a port to a task */ extern task_t convert_port_to_task( ipc_port_t port); @@ -139,6 +143,10 @@ extern thread_t port_name_to_thread( extern ipc_port_t convert_task_to_port( task_t task); +/* Convert from a task name to a port */ +extern ipc_port_t convert_task_name_to_port( + task_name_t task_name); + /* Convert from a thread to a port */ extern ipc_port_t convert_thread_to_port( thread_t thread); diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 57161e0bb..3d1d0c899 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -80,6 +80,7 @@ vm_map_t kalloc_map; vm_size_t kalloc_map_size = 16 * 1024 * 1024; vm_size_t kalloc_max; vm_size_t kalloc_max_prerounded; +vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map */ unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; @@ -98,6 +99,8 @@ vm_size_t kalloc_large_max; * It represents the first power of two for which no zone exists. * kalloc_max_prerounded is the smallest allocation size, before * rounding, for which no zone exists. + * Also if the allocation size is more than kalloc_kernmap_size + * then allocate from kernel map rather than kalloc_map. */ int first_k_zone = -1; @@ -189,6 +192,8 @@ kalloc_init( else kalloc_max = PAGE_SIZE; kalloc_max_prerounded = kalloc_max / 2 + 1; + /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */ + kalloc_kernmap_size = (kalloc_max * 16) + 1; /* * Allocate a zone for each size we are going to handle. @@ -215,6 +220,7 @@ kalloc_canblock( { register int zindex; register vm_size_t allocsize; + vm_map_t alloc_map = VM_MAP_NULL; /* * If size is too large for a zone, then use kmem_alloc. @@ -229,7 +235,13 @@ kalloc_canblock( if (!canblock) { return(0); } - if (kmem_alloc(kalloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) + + if (size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; + + if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) addr = 0; if (addr) { @@ -281,6 +293,7 @@ krealloc( register int zindex; register vm_size_t allocsize; void *naddr; + vm_map_t alloc_map = VM_MAP_NULL; /* can only be used for increasing allocation size */ @@ -299,11 +312,16 @@ krealloc( /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */ if (old_size >= kalloc_max_prerounded) { + if (old_size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; + old_size = round_page(old_size); new_size = round_page(new_size); if (new_size > old_size) { - if (KERN_SUCCESS != kmem_realloc(kalloc_map, + if (KERN_SUCCESS != kmem_realloc(alloc_map, (vm_offset_t)*addrp, old_size, (vm_offset_t *)&naddr, new_size)) { panic("krealloc: kmem_realloc"); @@ -314,7 +332,7 @@ krealloc( *addrp = (void *) naddr; /* kmem_realloc() doesn't free old page range. */ - kmem_free(kalloc_map, (vm_offset_t)*addrp, old_size); + kmem_free(alloc_map, (vm_offset_t)*addrp, old_size); kalloc_large_total += (new_size - old_size); @@ -344,7 +362,11 @@ krealloc( simple_unlock(lock); if (new_size >= kalloc_max_prerounded) { - if (KERN_SUCCESS != kmem_alloc(kalloc_map, + if (new_size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; + if (KERN_SUCCESS != kmem_alloc(alloc_map, (vm_offset_t *)&naddr, new_size)) { panic("krealloc: kmem_alloc"); simple_lock(lock); @@ -419,11 +441,16 @@ kfree( { register int zindex; register vm_size_t freesize; + vm_map_t alloc_map = VM_MAP_NULL; /* if size was too large for a zone, then use kmem_free */ if (size >= kalloc_max_prerounded) { - kmem_free(kalloc_map, (vm_offset_t)data, size); + if (size >= kalloc_kernmap_size) + alloc_map = kernel_map; + else + alloc_map = kalloc_map; + kmem_free(alloc_map, (vm_offset_t)data, size); kalloc_large_total -= size; kalloc_large_inuse--; diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c index aa7b65fc8..db2db8142 100644 --- a/osfmk/kern/locks.c +++ b/osfmk/kern/locks.c @@ -347,6 +347,17 @@ lck_attr_setdebug( } +/* + * Routine: lck_attr_rw_shared_priority + */ +void +lck_attr_rw_shared_priority( + lck_attr_t *attr) +{ + (void)hw_atomic_or((uint32_t *)&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY); +} + + /* * Routine: lck_attr_free */ @@ -1035,7 +1046,7 @@ unsigned int usimple_lock_try_EXT( lck_spin_t *lock) { - lck_spin_try_lock(lock); + return(lck_spin_try_lock(lock)); } void diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h index 06496c640..feca9487d 100644 --- a/osfmk/kern/locks.h +++ b/osfmk/kern/locks.h @@ -181,8 +181,10 @@ typedef struct _lck_attr_ { extern lck_attr_t LockDefaultLckAttr; -#define LCK_ATTR_NONE 0 -#define LCK_ATTR_DEBUG 0x1 +#define LCK_ATTR_NONE 0 + +#define LCK_ATTR_DEBUG 0x00000001 +#define LCK_ATTR_RW_SHARED_PRIORITY 0x00010000 #else typedef struct __lck_attr__ lck_attr_t; @@ -201,6 +203,11 @@ extern void lck_attr_setdefault( extern void lck_attr_setdebug( lck_attr_t *attr); +#ifdef XNU_KERNEL_PRIVATE +extern void lck_attr_rw_shared_priority( + lck_attr_t *attr); +#endif + extern void lck_attr_free( lck_attr_t *attr); diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c index 122d2ae6d..8a331ca87 100644 --- a/osfmk/kern/machine.c +++ b/osfmk/kern/machine.c @@ -83,6 +83,7 @@ #include #include +#include /* * Exported variables: @@ -136,7 +137,12 @@ host_reboot( return (KERN_SUCCESS); } - halt_all_cpus(!(options & HOST_REBOOT_HALT)); + if (options & HOST_REBOOT_UPSDELAY) { + // UPS power cutoff path + PEHaltRestart( kPEUPSDelayHaltCPU ); + } else { + halt_all_cpus(!(options & HOST_REBOOT_HALT)); + } return (KERN_SUCCESS); } diff --git a/osfmk/kern/misc_protos.h b/osfmk/kern/misc_protos.h index 193dce700..110daab9b 100644 --- a/osfmk/kern/misc_protos.h +++ b/osfmk/kern/misc_protos.h @@ -89,6 +89,10 @@ extern int copyoutmsg( user_addr_t user_addr, mach_msg_size_t nbytes); +/* Invalidate copy window(s) cache */ +extern void inval_copy_windows(thread_t); + + extern int sscanf(const char *input, const char *fmt, ...); extern integer_t sprintf(char *buf, const char *fmt, ...); diff --git a/osfmk/kern/page_decrypt.c b/osfmk/kern/page_decrypt.c new file mode 100644 index 000000000..34d66afb9 --- /dev/null +++ b/osfmk/kern/page_decrypt.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +#include +#include +#include + +/*#include */ +extern int hz; /* system clock's frequency */ +extern void* dsmos_blobs[]; +extern int dsmos_blob_count; + +/* #include */ +extern int tsleep(void *chan, int pri, const char *wmesg, int timo); + +/* #include */ +#define PZERO 22 /* No longer magic, shouldn't be here. XXX */ + +static int _dsmos_wait_for_callback(const void*,void*); + +static dsmos_page_transform_hook_t dsmos_hook = _dsmos_wait_for_callback; + +int +_dsmos_wait_for_callback(const void* from, void *to) +{ +/* printf("%s\n", __FUNCTION__); */ + while (dsmos_hook == NULL) + tsleep(&dsmos_hook, PZERO, "dsmos", hz / 10); + + return (*dsmos_hook) (from, to); +} + +void +dsmos_page_transform_hook(dsmos_page_transform_hook_t hook, + void (*commpage_setup_dsmos_blob)(void**, int)) +{ +#ifdef i386 + /* finish initializing the commpage here */ + (*commpage_setup_dsmos_blob)(dsmos_blobs, dsmos_blob_count); +#endif + + /* set the hook now - new callers will run with it */ + dsmos_hook = hook; +} + +int +dsmos_page_transform(const void* from, void* to) +{ +/* printf("%s\n", __FUNCTION__); */ + if (dsmos_hook == NULL) + return KERN_FAILURE; + return (*dsmos_hook) (from, to); +} + diff --git a/osfmk/kern/page_decrypt.h b/osfmk/kern/page_decrypt.h new file mode 100644 index 000000000..7e0e56135 --- /dev/null +++ b/osfmk/kern/page_decrypt.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_PAGE_DECRYPT_H +#define _KERN_PAGE_DECRYPT_H + +typedef int (*dsmos_page_transform_hook_t) (const void*,void*); +extern void dsmos_page_transform_hook(dsmos_page_transform_hook_t hook, + void (*commpage_setup_dsmos_blob)(void**, int)); /* exported */ + +extern int dsmos_page_transform(const void*,void*); + +#endif /* _KERN_PAGE_DECRYPT_H */ diff --git a/osfmk/ppc/pms.c b/osfmk/kern/pms.c similarity index 82% rename from osfmk/ppc/pms.c rename to osfmk/kern/pms.c index 58c47f9b7..c8dd65216 100644 --- a/osfmk/ppc/pms.c +++ b/osfmk/kern/pms.c @@ -19,19 +19,20 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#ifdef __ppc__ +# include +# include +#else +# include +# include +#endif +#include +#include #include - -extern int real_ncpus; +#include +#include static uint32_t pmsSyncrolator = 0; /* Only one control operation at a time please */ uint32_t pmsBroadcastWait = 0; /* Number of outstanding broadcasts */ @@ -62,6 +63,15 @@ pmsSetFunc_t pmsFuncTab[pmsSetFuncMax] = {0}; /* This is the function index tab pmsQueryFunc_t pmsQueryFunc = 0; /* Pointer to pmsQuery function */ uint32_t pmsPlatformData = 0; /* Data provided by and passed to platform functions */ +#ifdef __ppc__ +# define PER_PROC_INFO struct per_proc_info +# define GET_PER_PROC_INFO() getPerProc() +#else +# define PER_PROC_INFO cpu_data_t +# define GET_PER_PROC_INFO() current_cpu_datap() +#endif + + /* * Do any initialization needed @@ -74,6 +84,8 @@ void pmsInit(void) { simple_lock_init(&pmsBuildLock, 0); /* Initialize the build lock */ for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy; /* Initialize the table to dummy steps */ + pmsCPUMachineInit(); + return; } @@ -133,10 +145,10 @@ void pmsPark(void) { void pmsDown(void) { - struct per_proc_info *pp; + PER_PROC_INFO *pp; uint32_t nstate; - pp = getPerProc(); /* Get our per_proc */ + pp = GET_PER_PROC_INFO(); /* Get our per_proc */ if(!pmsInstalled || pp->pms.pmsState == pmsParked) return; /* No stepping if parked or not installed */ @@ -152,26 +164,83 @@ void pmsDown(void) { * * Interrupts must be off... */ + +int pmsStepIdleSneaks; +int pmsStepIdleTries; void pmsStep(int timer) { - struct per_proc_info *pp; - uint32_t nstate; - int dir; - - pp = getPerProc(); /* Get our per_proc */ - - if(!pmsInstalled || pp->pms.pmsState == pmsParked) return; /* No stepping if parked or not installed */ - - nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsNext; /* Assume a normal step */ - dir = 1; /* A normal step is a step up */ - - if(timer && (pmsCtls.pmsDefs[pp->pms.pmsState]->pmsSetCmd == pmsDelay)) { /* If the timer expired and we are in a delay step, use the delay branch */ - nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsTDelay; /* Get the delayed step */ - dir = 0; /* Delayed steps are a step down for accounting purposes. */ + PER_PROC_INFO *pp; + uint32_t nstate; + uint32_t tstate; + uint32_t pkgstate; + int dir; + int i; + + pp = GET_PER_PROC_INFO(); /* Get our per_proc */ + + if(!pmsInstalled || pp->pms.pmsState == pmsParked) + return; /* No stepping if parked or not installed */ + + /* + * Assume a normal step. + */ + nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsNext; + + /* + * If we are idling and being asked to step up, check to see whether + * the package we're in is already at a non-idle power state. If so, + * attempt to work out what state that is, and go there directly to + * avoid wasting time ramping up. + */ + if ((pp->pms.pmsState == pmsIdle) + && ((pkgstate = pmsCPUPackageQuery()) != ~(uint32_t)0)) { + /* + * Search forward through the stepper program, + * avoid looping for too long. + */ + tstate = nstate; + pmsStepIdleTries++; + for (i = 0; i < 32; i++) { + /* + * Compare command with current package state + */ + if ((pmsCtls.pmsDefs[tstate]->pmsSetCmd & pmsCPU) == pkgstate) { + nstate = tstate; + pmsStepIdleSneaks++; + break; + } + + /* + * Advance to the next step in the program. + */ + if (pmsCtls.pmsDefs[tstate]->pmsNext == tstate) + break; /* infinite loop */ + tstate = pmsCtls.pmsDefs[tstate]->pmsNext; + } + } + + /* + * Default to a step up. + */ + dir = 1; + + /* + * If we are stepping as a consequence of timer expiry, select the + * alternate exit path and note this as downward step for accounting + * purposes. + */ + if (timer + && (pmsCtls.pmsDefs[pp->pms.pmsState]->pmsSetCmd == pmsDelay)) { + nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsTDelay; + + /* + * Delayed steps are a step down for accounting purposes. + */ + dir = 0; } - pmsSetStep(nstate, dir); /* Step to it */ + pmsSetStep(nstate, dir); return; } @@ -186,13 +255,13 @@ void pmsStep(int timer) { void pmsSetStep(uint32_t nstep, int dir) { - struct per_proc_info *pp; - uint32_t pstate, ret, nCSetCmd, mCSetCmd; + PER_PROC_INFO *pp; + uint32_t pstate, nCSetCmd, mCSetCmd; pmsDef *pnstate, *pcstate; - uint64_t tb, nt, dur; - int cpu, frompark; + uint64_t tb, dur; + int cpu; - pp = getPerProc(); /* Get our per_proc */ + pp = GET_PER_PROC_INFO(); /* Get our per_proc */ cpu = cpu_number(); /* Get our processor */ while(1) { /* Keep stepping until we get a delay */ @@ -207,7 +276,7 @@ void pmsSetStep(uint32_t nstep, int dir) { pp->pms.pmsStamp = tb; /* Show transition now */ pp->pms.pmsPop = HalfwayToForever; /* Set the pop way into the future */ pp->pms.pmsState = pmsParked; /* Make sure we are parked */ - setTimerReq(); /* Cancel our timer if going */ + etimer_resync_deadlines(); /* Cancel our timer if going */ return; } @@ -252,7 +321,7 @@ void pmsSetStep(uint32_t nstep, int dir) { if((pnstate->pmsSetCmd == pmsDelay) || (!(pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0))) { /* Is this not syncronous and a non-zero delay or a delayed step? */ - setTimerReq(); /* Start the timers ticking */ + etimer_resync_deadlines(); /* Start the timers ticking */ break; /* We've stepped as far as we're going to... */ } @@ -270,18 +339,16 @@ void pmsSetStep(uint32_t nstep, int dir) { void pmsRunLocal(uint32_t nstep) { - struct per_proc_info *pp; - uint32_t cstate, ret, lastState; - pmsDef *pnstate, *pcstate; - uint64_t tb, nt, dur; - int cpu, i, j; + PER_PROC_INFO *pp; + uint32_t lastState; + int cpu, i; boolean_t intr; if(!pmsInstalled) return; /* Ignore this if no step programs installed... */ intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ - pp = getPerProc(); /* Get our per_proc */ + pp = GET_PER_PROC_INFO(); /* Get our per_proc */ if(nstep == pmsStartUp) { /* Should we start up? */ pmsCPUInit(); /* Get us up to full with high voltage and park */ @@ -310,67 +377,70 @@ void pmsRunLocal(uint32_t nstep) { /* * Control the Power Management Stepper. - * Called from user state by the superuser via a ppc system call. + * Called from user state by the superuser. * Interruptions disabled. * */ +kern_return_t pmsControl(uint32_t request, user_addr_t reqaddr, uint32_t reqsize) { -int pmsCntrl(struct savearea *save) { - - uint32_t request, nstep, reqsize, result, presult; + uint32_t nstep, result, presult; int ret, cpu; kern_return_t kret; pmsDef *ndefs; - struct per_proc_info *pp; + PER_PROC_INFO *pp; - pp = getPerProc(); /* Get our per_proc */ + pp = GET_PER_PROC_INFO(); /* Get our per_proc */ cpu = cpu_number(); /* Get our processor */ if(!is_suser()) { /* We are better than most, */ - save->save_r3 = KERN_FAILURE; /* so we will only talk to the superuser. */ - return 1; /* Turn up our noses, say "harrumph," and walk away... */ + return KERN_FAILURE; /* so we will only talk to the superuser. */ } - - if(save->save_r3 >= pmsCFree) { /* Can we understand the request? */ - save->save_r3 = KERN_INVALID_ARGUMENT; /* What language are these guys talking in, anyway? */ - return 1; /* Cock head like a confused puppy and run away... */ + + if(request >= pmsCFree) { /* Can we understand the request? */ + return KERN_INVALID_ARGUMENT; /* What language are these guys talking in, anyway? */ } - request = (int)save->save_r3; /* Remember the request */ - reqsize = (uint32_t)save->save_r5; /* Get the size of the config table */ - if(request == pmsCQuery) { /* Are we just checking? */ - result = pmsCPUquery() & pmsCPU; /* Get the processor data and make sure there is no slop */ + result = pmsCPUQuery() & pmsCPU; /* Get the processor data and make sure there is no slop */ presult = 0; /* Assume nothing */ if((uint32_t)pmsQueryFunc) presult = pmsQueryFunc(cpu, pmsPlatformData); /* Go get the platform state */ result = result | (presult & (pmsXClk | pmsVoltage | pmsPowerID)); /* Merge the platform state with no slop */ - save->save_r3 = result; /* Tell 'em... */ - return 1; + return result; /* Tell 'em... */ } if(request == pmsCExperimental) { /* Enter experimental mode? */ if(pmsInstalled || (pmsExperimental & 1)) { /* Are we already running or in experimental? */ - save->save_r3 = KERN_FAILURE; /* Fail, since we are already running */ - return 1; + return KERN_FAILURE; /* Fail, since we are already running */ } pmsExperimental |= 1; /* Flip us into experimental but don't change other flags */ pmsCPUConf(); /* Configure for this machine */ pmsStart(); /* Start stepping */ - save->save_r3 = KERN_SUCCESS; /* We are victorious... */ - return 1; + return KERN_SUCCESS; /* We are victorious... */ } if(request == pmsCCnfg) { /* Do some up-front checking before we commit to doing this */ if((reqsize > (pmsMaxStates * sizeof(pmsDef))) || (reqsize < (pmsFree * sizeof(pmsDef)))) { /* Check that the size is reasonable */ - save->save_r3 = KERN_NO_SPACE; /* Tell them that they messed up */ - return 1; /* l8r... */ + return KERN_NO_SPACE; /* Tell them that they messed up */ } } + if (request == pmsGCtls) { + if (reqsize != sizeof(pmsCtls)) + return(KERN_FAILURE); + ret = copyout(&pmsCtls, reqaddr, reqsize); + return kret; + } + + if (request == pmsGStats) { + if (reqsize != sizeof(pmsStatsd)) /* request size is fixed */ + return KERN_FAILURE; + ret = copyout(&pmsStatsd, reqaddr, reqsize); + return kret; /* All done... */ + } /* * We are committed after here. If there are any errors detected, we shouldn't die, but we @@ -381,10 +451,8 @@ int pmsCntrl(struct savearea *save) { */ if(!hw_compare_and_store(0, 1, &pmsSyncrolator)) { /* Are we already doing this? */ - save->save_r3 = KERN_RESOURCE_SHORTAGE; /* Tell them that we are already busy and to try again */ - return 1; /* G'wan away and don't bother me... */ + return KERN_RESOURCE_SHORTAGE; /* Tell them that we are already busy and to try again */ } - save->save_r3 = KERN_SUCCESS; /* Assume success */ // NOTE: We will block in the following code until everyone has finished the prepare @@ -392,7 +460,7 @@ int pmsCntrl(struct savearea *save) { if(request == pmsCPark) { /* Is all we're supposed to do park? */ pmsSyncrolator = 0; /* Free us up */ - return 1; /* Well, then we're done... */ + return KERN_SUCCESS; /* Well, then we're done... */ } switch(request) { /* Select the routine */ @@ -412,25 +480,22 @@ int pmsCntrl(struct savearea *save) { case pmsCCnfg: /* Loads new stepper program */ if(!(ndefs = (pmsDef *)kalloc(reqsize))) { /* Get memory for the whole thing */ - save->save_r3 = KERN_INVALID_ADDRESS; /* Return invalid address */ pmsSyncrolator = 0; /* Free us up */ - return 1; /* All done... */ + return KERN_INVALID_ADDRESS; /* All done... */ } - ret = copyin((user_addr_t)((unsigned int)(save->save_r4)), (void *)ndefs, reqsize); /* Get the new config table */ + ret = copyin(reqaddr, (void *)ndefs, reqsize); /* Get the new config table */ if(ret) { /* Hmmm, something went wrong with the copyin */ - save->save_r3 = KERN_INVALID_ADDRESS; /* Return invalid address */ kfree((vm_offset_t)ndefs, reqsize); /* Free up the copied in data */ pmsSyncrolator = 0; /* Free us up */ - return 1; /* All done... */ + return KERN_INVALID_ADDRESS; /* All done... */ } kret = pmsBuild(ndefs, reqsize, 0, 0, 0); /* Go build and replace the tables. Make sure we keep the old platform stuff */ if(kret) { /* Hmmm, something went wrong with the compilation */ - save->save_r3 = kret; /* Pass back the passed back return code */ kfree((vm_offset_t)ndefs, reqsize); /* Free up the copied in data */ pmsSyncrolator = 0; /* Free us up */ - return 1; /* All done... */ + return kret; /* All done... */ } nstep = pmsNormHigh; /* Set the request */ @@ -443,46 +508,21 @@ int pmsCntrl(struct savearea *save) { pmsRun(nstep); /* Get everyone into step */ pmsSyncrolator = 0; /* Free us up */ - return 1; /* All done... */ + return KERN_SUCCESS; /* All done... */ } /* * Broadcast a change to all processors including ourselves. - * This must transition before broadcasting because we may block and end up on a different processor. - * - * This will block until all processors have transitioned, so - * obviously, this can block. - * - * Called with interruptions disabled. * + * Interruptions are disabled. */ void pmsRun(uint32_t nstep) { - pmsRunLocal(nstep); /* If we aren't parking (we are already parked), transition ourselves */ - (void)cpu_broadcast(&pmsBroadcastWait, pmsRemote, nstep); /* Tell everyone else to do it too */ - - return; - + pmsCPURun(nstep); } -/* - * Receive a broadcast and react. - * This is called from the interprocessor signal handler. - * We wake up the initiator after we are finished. - * - */ - -void pmsRemote(uint32_t nstep) { - - pmsRunLocal(nstep); /* Go set the step */ - if(!hw_atomic_sub(&pmsBroadcastWait, 1)) { /* Drop the wait count */ - thread_wakeup((event_t)&pmsBroadcastWait); /* If we were the last, wake up the signaller */ - } - return; -} - /* * Build the tables needed for the stepper. This includes both the step definitions and the step control table. @@ -525,7 +565,7 @@ kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint3 steps = pdsize / sizeof(pmsDef); /* Get the number of steps supplied */ if((steps >= pmsMaxStates) || (steps < pmsFree)) /* Complain if too big or too small */ - return KERN_INVALID_ARGUMENT; /* Squeak loudly!!! */ + return KERN_INVALID_ARGUMENT; /* Squeak loudly!!! */ if((uint32_t)functab && (uint32_t)functab[0]) /* Verify that if they supplied a new function table, entry 0 is 0 */ return KERN_INVALID_ARGUMENT; /* Fail because they didn't reserve entry 0 */ @@ -666,15 +706,15 @@ kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint3 newpd[i].pmsTDelay = pd[i].pmsTDelay; /* Set the delayed setp */ pmsCtls.pmsDefs[i] = &newpd[i]; /* Copy it in */ } - +#ifdef __ppc__ pmsCtlp = (uint32_t)&pmsCtls; /* Point to the new pms table */ - +#endif pmsInstalled = 1; /* The stepper has been born or born again... */ simple_unlock(&pmsBuildLock); /* Free play! */ (void)ml_set_interrupts_enabled(intr); /* Interrupts back the way there were */ - if((uint32_t)oldAlt) kfree((vm_offset_t)oldAlt, oldAltSize); /* If we already had an alternate, free it */ + if((uint32_t)oldAlt) kfree(oldAlt, oldAltSize); /* If we already had an alternate, free it */ if(xdsply) kprintf("Stepper table installed\n"); diff --git a/osfmk/kern/pms.h b/osfmk/kern/pms.h new file mode 100644 index 000000000..72f601057 --- /dev/null +++ b/osfmk/kern/pms.h @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifdef KERNEL_PRIVATE + +#ifndef _KERN_PMS_H_ +#define _KERN_PMS_H_ + +#define pmsMaxStates 64 +#define HalfwayToForever 0x7FFFFFFFFFFFFFFFULL +#define century 790560000000000ULL + +typedef void (*pmsSetFunc_t)(uint32_t, uint32_t, uint32_t); /* Function used to set hardware power state */ +typedef uint32_t (*pmsQueryFunc_t)(uint32_t, uint32_t); /* Function used to query hardware power state */ + +typedef struct pmsStat { + uint64_t stTime[2]; /* Total time until switch to next step */ + uint32_t stCnt[2]; /* Number of times switched to next step */ +} pmsStat; + +typedef struct pmsDef { + uint64_t pmsLimit; /* Max time in this state in microseconds */ + uint32_t pmsStepID; /* Unique ID for this step */ + uint32_t pmsSetCmd; /* Command to select power state */ +#define pmsCngXClk 0x80000000 /* Change external clock */ +#define pmsXUnk 0x7F /* External clock unknown */ +#define pmsXClk 0x7F000000 /* External clock frequency */ +#define pmsCngCPU 0x00800000 /* Change CPU parameters */ +#define pmsSync 0x00400000 /* Make changes synchronously, i.e., spin until delay finished */ +#define pmsMustCmp 0x00200000 /* Delay must complete before next change */ +#define pmsCPU 0x001F0000 /* CPU frequency */ +#define pmsCPUUnk 0x1F /* CPU frequency unknown */ +#define pmsCngVolt 0x00008000 /* Change voltage */ +#define pmsVoltage 0x00007F00 /* Voltage */ +#define pmsVoltUnk 0x7F /* Voltage unknown */ +#define pmsPowerID 0x000000FF /* Identify power state to HW */ + +/* Special commands - various things */ +#define pmsDelay 0xFFFFFFFD /* Delayed step, no processor or platform changes. Timer expiration causes transition to pmsTDelay */ +#define pmsParkIt 0xFFFFFFFF /* Enters the parked state. No processor or platform changes. Timers cancelled */ +#define pmsCInit ((pmsXUnk << 24) | (pmsCPUUnk << 16) | (pmsVoltUnk << 8)) /* Initial current set command value */ +/* Note: pmsSetFuncInd is an index into a table of function pointers and pmsSetFunc is the address + * of a function. Initially, when you create a step table, this field is set as an index into + * a table of function addresses that gets passed as a parameter to pmsBuild. When pmsBuild + * internalizes the step and function tables, it converts the index to the function address. + */ + union sf { + pmsSetFunc_t pmsSetFunc; /* Function used to set platform power state */ + uint32_t pmsSetFuncInd; /* Index to function in function table */ + } sf; + + uint32_t pmsDown; /* Next state if going lower */ + uint32_t pmsNext; /* Normal next state */ + uint32_t pmsTDelay; /* State if command was pmsDelay and timer expired */ +} pmsDef; + +typedef struct pmsCtl { + pmsStat (*pmsStats)[pmsMaxStates]; /* Pointer to statistics information, 0 if not enabled */ + pmsDef *pmsDefs[pmsMaxStates]; /* Indexed pointers to steps */ +} pmsCtl; + +/* + * Note that this block is in the middle of the per_proc and the size (32 bytes) + * can't be changed without moving it. + */ + +typedef struct pmsd { + uint32_t pmsState; /* Current power management state */ + uint32_t pmsCSetCmd; /* Current select command */ + uint64_t pmsPop; /* Time of next step */ + uint64_t pmsStamp; /* Time of transition to current state */ + uint64_t pmsTime; /* Total time in this state */ +} pmsd; + +/* + * Required power management step programs + */ + +enum { + pmsIdle = 0, /* Power state in idle loop */ + pmsNorm = 1, /* Normal step - usually low power */ + pmsNormHigh = 2, /* Highest power in normal step */ + pmsBoost = 3, /* Boost/overdrive step */ + pmsLow = 4, /* Lowest non-idle power state, no transitions */ + pmsHigh = 5, /* Power step for full on, no transitions */ + pmsPrepCng = 6, /* Prepare for step table change */ + pmsPrepSleep = 7, /* Prepare for sleep */ + pmsOverTemp = 8, /* Machine is too hot */ + pmsEnterNorm = 9, /* Enter into the normal step program */ + pmsFree = 10, /* First available empty step */ + pmsStartUp = 0xFFFFFFFE, /* Start stepping */ + pmsParked = 0xFFFFFFFF /* Power parked - used when changing stepping table */ +}; + +/* + * Power Management Stepper Control requests + */ + +enum { + pmsCPark = 0, /* Parks the stepper */ + pmsCStart = 1, /* Starts normal steppping */ + pmsCFLow = 2, /* Forces low power */ + pmsCFHigh = 3, /* Forces high power */ + pmsCCnfg = 4, /* Loads new stepper program */ + pmsCQuery = 5, /* Query current step and state */ + pmsCExperimental = 6, /* Enter experimental mode */ + pmsGCtls = 7, + pmsGStats = 8, + pmsCVID = 9, + pmsCFree = 10 /* Next control command to be assigned */ +}; + +/* + * User request control structure passed to sysctl + */ +typedef struct { + uint32_t request; /* stepper control request */ + uint32_t reqsize; /* size of data */ + void *reqaddr; /* read/write data buffer */ +} pmsctl_t; + +extern pmsCtl pmsCtls; /* Power Management Stepper control */ +#ifdef __ppc__ +extern uint32_t pmsCtlp; +#endif +extern uint32_t pmsBroadcastWait; /* Number of outstanding broadcasts */ +extern int pmsInstalled; +extern int pmsExperimental; + +#define pmsSetFuncMax 32 +extern pmsSetFunc_t pmsFuncTab[pmsSetFuncMax]; +extern pmsQueryFunc_t pmsQueryFunc; +extern uint32_t pmsPlatformData; + +#ifdef __ppc__ +extern int pmsCntrl(struct savearea *save); +#endif +extern kern_return_t pmsControl(uint32_t request, user_addr_t reqaddr, uint32_t reqsize); +extern void pmsInit(void); +extern void pmsStep(int timer); +extern void pmsDown(void); +extern void pmsSetStep(uint32_t nstep, int dir); +extern void pmsRunLocal(uint32_t nstep); +extern void pmsCPUSet(uint32_t sel); +extern uint32_t pmsCPUQuery(void); +extern uint32_t pmsCPUPackageQuery(void); +extern void pmsCPUConf(void); +extern void pmsCPUMachineInit(void); +extern void pmsCPUInit(void); +extern void pmsCPURun(uint32_t nstep); + +extern void pmsCPUYellowFlag(void); +extern void pmsCPUGreenFlag(void); + +#ifdef __cplusplus +extern "C" { +#endif + +extern kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc); +extern void pmsRun(uint32_t nstep); +extern void pmsPark(void); +extern void pmsStart(void); +# ifndef __ppc__ +extern kern_return_t pmsCPULoadVIDTable(uint16_t *tablep, int nstates); /* i386 only */ +extern kern_return_t pmsCPUSetPStateLimit(uint32_t limit); +# endif +#ifdef __cplusplus +} +#endif + +#endif /* _KERN_PMS_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c index e154b7501..90bda90e0 100644 --- a/osfmk/kern/printf.c +++ b/osfmk/kern/printf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -766,6 +766,8 @@ printf(const char *fmt, ...) enable_preemption(); } +extern unsigned int disableSerialOuput; + void consdebug_putc( char c) @@ -777,10 +779,9 @@ consdebug_putc( debug_putc(c); -#ifdef __ppc__ if (!console_is_serial()) - PE_kputc(c); -#endif + if (!disableSerialOuput) + PE_kputc(c); } void diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index aaece698a..56bb97555 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -70,6 +70,7 @@ #include #include +#include #include #include @@ -96,9 +97,7 @@ #include -#ifdef __ppc__ -#include -#endif +#include #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ int default_preemption_rate = DEFAULT_PREEMPTION_RATE; @@ -1569,10 +1568,8 @@ thread_dispatch( * If blocked at a continuation, discard * the stack. */ -#ifndef i386 if (thread->continuation != NULL && thread->kernel_stack) stack_free(thread); -#endif if (!(thread->state & TH_IDLE)) { wake_lock(thread); @@ -1653,6 +1650,16 @@ thread_block_reason( s = splsched(); +#if 0 +#if MACH_KDB + { + extern void db_chkpmgr(void); + db_chkpmgr(); /* (BRINGUP) See if pm config changed */ + + } +#endif +#endif + if (!(reason & AST_PREEMPT)) funnel_release_check(self, 2); @@ -2488,8 +2495,10 @@ delay_idle( timer_event((uint32_t)abstime, &processor->idle_thread->system_timer); } - else + else { + cpu_pause(); abstime = mach_absolute_time(); + } } timer_event((uint32_t)abstime, &self->system_timer); @@ -2530,9 +2539,7 @@ idle_thread(void) (void)splsched(); /* Turn interruptions off */ -#ifdef __ppc__ pmsDown(); /* Step power down. Note: interruptions must be disabled for this call */ -#endif while ( (*threadp == THREAD_NULL) && (*gcount == 0) && (*lcount == 0) ) { @@ -2556,9 +2563,7 @@ idle_thread(void) pset = processor->processor_set; simple_lock(&pset->sched_lock); -#ifdef __ppc__ pmsStep(0); /* Step up out of idle power, may start timer for next step */ -#endif state = processor->state; if (state == PROCESSOR_DISPATCHING) { diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c index 530d7bff5..ee1200b10 100644 --- a/osfmk/kern/stack.c +++ b/osfmk/kern/stack.c @@ -93,7 +93,7 @@ stack_init(void) panic("stack_init: kmem_suballoc"); map_addr = vm_map_min(stack_map); - if (vm_map_enter(stack_map, &map_addr, vm_map_round_page(PAGE_SIZE), 0, VM_FLAGS_FIXED, + if (vm_map_enter(stack_map, &map_addr, vm_map_round_page(PAGE_SIZE), 0, (VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_FIXED), VM_OBJECT_NULL, 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT) != KERN_SUCCESS) panic("stack_init: vm_map_enter"); } diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index 65f9d00f8..dc9a6a92a 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,6 @@ #ifdef __ppc__ #include #include -#include #endif static void kernel_bootstrap_thread(void); @@ -103,9 +103,15 @@ static void kernel_bootstrap_thread(void); static void load_context( thread_t thread); +#ifdef i386 +extern void cpu_window_init(int); +#endif + + /* * Running in virtual memory, on the interrupt stack. */ + void kernel_bootstrap(void) { @@ -147,8 +153,8 @@ kernel_bootstrap(void) * Create a kernel thread to execute the kernel bootstrap. */ result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread); - if (result != KERN_SUCCESS) - panic("kernel_bootstrap"); + + if (result != KERN_SUCCESS) panic("kernel_bootstrap: result = %08X\n", result); thread->state = TH_RUN; thread_deallocate(thread); @@ -211,10 +217,18 @@ kernel_bootstrap_thread(void) (void) spllo(); /* Allow interruptions */ - /* - * Fill in the comm area (mapped into every task address space.) - */ - commpage_populate(); + /* + * Fill in the comm area (mapped into every task address space.) + */ + commpage_populate(); + +#ifdef i386 + /* + * create and initialize a copy window + * for processor 0 + */ + cpu_window_init(0); +#endif /* * Start the user bootstrap. @@ -225,9 +239,7 @@ kernel_bootstrap_thread(void) } #endif -#if __ppc__ serial_keyboard_init(); /* Start serial keyboard if wanted */ -#endif thread_bind(self, PROCESSOR_NULL); diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c index bf85bb308..61fc5685d 100644 --- a/osfmk/kern/syscall_sw.c +++ b/osfmk/kern/syscall_sw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -130,7 +130,7 @@ mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 41 */ MACH_TRAP(init_process, 0, NULL, NULL), /* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd), -/* 44 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 44 */ MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd), /* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd), /* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd), /* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), @@ -182,7 +182,7 @@ mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, munge_w, munge_d), /* 93 */ MACH_TRAP(mk_timer_arm_trap, 3, munge_wl, munge_dd), /* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, munge_ww, munge_dd), -/* 95 */ MACH_TRAP(mk_timebase_info_trap, 5, munge_wwwww, munge_ddddd), +/* 95 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* traps 64 - 95 reserved (debo) */ /* 96 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 97 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index 4cbdb120f..5c288e37b 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -114,7 +114,7 @@ #include #include /* for kernel_map, ipc_kernel_map */ #include -#include /* for vm_map_remove_commpage64 */ +#include /* for vm_map_remove_commpage */ #if MACH_KDB #include @@ -188,29 +188,44 @@ task_set_64bit( task_t task, boolean_t is64bit) { - if(is64bit) { + thread_t thread; + + if (is64bit) { + if (task_has_64BitAddr(task)) + return; + /* LP64todo - no task working set for 64-bit */ task_set_64BitAddr(task); task_working_set_disable(task); - task->map->max_offset = MACH_VM_MAX_ADDRESS; } else { + if ( !task_has_64BitAddr(task)) + return; + /* * Deallocate all memory previously allocated * above the 32-bit address space, since it won't * be accessible anymore. */ /* LP64todo - make this clean */ -#ifdef __ppc__ - vm_map_remove_commpage64(task->map); + vm_map_remove_commpage(task->map); pmap_unmap_sharedpage(task->map->pmap); /* Unmap commpage */ -#endif (void) vm_map_remove(task->map, (vm_map_offset_t) VM_MAX_ADDRESS, MACH_VM_MAX_ADDRESS, VM_MAP_NO_FLAGS); task_clear_64BitAddr(task); - task->map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; } + /* FIXME: On x86, the thread save state flavor can diverge from the + * task's 64-bit feature flag due to the 32-bit/64-bit register save + * state dichotomy. Since we can be pre-empted in this interval, + * certain routines may observe the thread as being in an inconsistent + * state with respect to its task's 64-bitness. + */ +#ifdef __i386__ + queue_iterate(&task->threads, thread, thread_t, task_threads) { + machine_thread_switch_addrmode(thread, !is64bit); + } +#endif } void @@ -225,7 +240,7 @@ task_init(void) /* * Create the kernel task as the first task. */ - if (task_create_internal(TASK_NULL, FALSE, &kernel_task) != KERN_SUCCESS) + if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) panic("task_init\n"); vm_map_deallocate(kernel_task->map); @@ -307,7 +322,7 @@ task_create( return(KERN_INVALID_ARGUMENT); return task_create_internal( - parent_task, inherit_memory, child_task); + parent_task, inherit_memory, task_has_64BitAddr(parent_task), child_task); } kern_return_t @@ -331,7 +346,7 @@ host_security_create_task_token( return(KERN_INVALID_SECURITY); result = task_create_internal( - parent_task, inherit_memory, child_task); + parent_task, inherit_memory, task_has_64BitAddr(parent_task), child_task); if (result != KERN_SUCCESS) return(result); @@ -352,6 +367,7 @@ kern_return_t task_create_internal( task_t parent_task, boolean_t inherit_memory, + boolean_t is_64bit, task_t *child_task) /* OUT */ { task_t new_task; @@ -368,7 +384,7 @@ task_create_internal( if (inherit_memory) new_task->map = vm_map_fork(parent_task->map); else - new_task->map = vm_map_create(pmap_create(0), + new_task->map = vm_map_create(pmap_create(0, is_64bit), (vm_map_offset_t)(VM_MIN_ADDRESS), (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE); @@ -401,6 +417,10 @@ task_create_internal( new_task->bsd_info = 0; #endif /* MACH_BSD */ +#ifdef __i386__ + new_task->i386_ldt = 0; +#endif + #ifdef __ppc__ if(BootProcInfo.pf.Available & pf64Bit) new_task->taskFeatures[0] |= tf64BitData; /* If 64-bit machine, show we have 64-bit registers at least */ #endif @@ -446,6 +466,11 @@ task_create_internal( convert_port_to_ledger(parent_task->paged_ledger_port)); if(task_has_64BitAddr(parent_task)) task_set_64BitAddr(new_task); + +#ifdef __i386__ + if (inherit_memory && parent_task->i386_ldt) + new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt); +#endif } else { pset = &default_pset; @@ -514,6 +539,19 @@ task_deallocate( zfree(task_zone, task); } +/* + * task_name_deallocate: + * + * Drop a reference on a task name. + */ +void +task_name_deallocate( + task_name_t task_name) +{ + return(task_deallocate((task_t)task_name)); +} + + /* * task_terminate: * @@ -627,10 +665,11 @@ task_terminate_internal( ipc_space_destroy(task->itk_space); /* LP64todo - make this clean */ -#ifdef __ppc__ - vm_map_remove_commpage64(task->map); + vm_map_remove_commpage(task->map); pmap_unmap_sharedpage(task->map->pmap); /* Unmap commpage */ -#endif + + if (vm_map_has_4GB_pagezero(task->map)) + vm_map_clear_4GB_pagezero(task->map); /* * If the current thread is a member of the task diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 050f66906..5e3948343 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -91,6 +91,7 @@ #include #include #include +#include #include #include @@ -148,6 +149,7 @@ struct task { /* IPC structures */ decl_mutex_data(,itk_lock_data) struct ipc_port *itk_self; /* not a right, doesn't hold ref */ + struct ipc_port *itk_nself; /* not a right, doesn't hold ref */ struct ipc_port *itk_sself; /* a send right */ struct exception_action exc_actions[EXC_TYPES_COUNT]; /* a send right each valid element */ @@ -169,6 +171,8 @@ struct task { struct ipc_port *paged_ledger_port; unsigned int priv_flags; /* privilege resource flags */ #define VM_BACKING_STORE_PRIV 0x1 + + MACHINE_TASK integer_t faults; /* faults counter */ integer_t pageins; /* pageins counter */ @@ -261,6 +265,7 @@ extern kern_return_t task_terminate_internal( extern kern_return_t task_create_internal( task_t parent_task, boolean_t inherit_memory, + boolean_t is_64bit, task_t *child_task); /* OUT */ extern kern_return_t task_importance( @@ -277,6 +282,7 @@ extern void task_backing_store_privileged( extern void task_working_set_disable( task_t task); + /* Get number of activations in a task */ extern int get_task_numacts( task_t task); @@ -304,6 +310,8 @@ extern task_t kernel_task; extern void task_deallocate( task_t task); +extern void task_name_deallocate( + task_name_t task_name); __END_DECLS #endif /* _KERN_TASK_H_ */ diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 0c181bfb6..2d8cbc4be 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -774,11 +774,9 @@ kernel_thread_create( pset_unlock(task->processor_set); task_unlock(task); -#if !defined(i386) stack_alloc(thread); assert(thread->kernel_stack != 0); thread->reserved_stack = thread->kernel_stack; -#endif /* !defined(i386) */ thread->parameter = parameter; @@ -1134,12 +1132,10 @@ funnel_alloc( if (funnel_lck_grp == LCK_GRP_NULL) { funnel_lck_grp_attr = lck_grp_attr_alloc_init(); - //lck_grp_attr_setstat(funnel_lck_grp_attr); funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr); funnel_lck_attr = lck_attr_alloc_init(); - //lck_attr_setdebug(funnel_lck_attr); } if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){ bzero((void *)fnl, sizeof(funnel_t)); diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index cde616313..bca2bc841 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -515,6 +515,9 @@ extern void machine_thread_init(void); extern kern_return_t machine_thread_create( thread_t thread, task_t task); +extern void machine_thread_switch_addrmode( + thread_t thread, + int oldmode_is64bit); extern void machine_thread_destroy( thread_t thread); @@ -640,6 +643,10 @@ extern void thread_setentrypoint( thread_t thread, mach_vm_offset_t entry); +extern void thread_setsinglestep( + thread_t thread, + int on); + extern kern_return_t thread_wire_internal( host_priv_t host_priv, thread_t thread, @@ -653,6 +660,9 @@ extern boolean_t is_thread_idle(thread_t); /* True is TH_IDLE */ extern kern_return_t thread_dup(thread_t); extern task_t get_threadtask(thread_t); +#define thread_is_64bit(thd) \ + task_has_64BitAddr(get_threadtask(thd)) + extern void *get_bsdthread_info(thread_t); extern void set_bsdthread_info(thread_t, void *); diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index fd48c3959..59b0a96b7 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -37,6 +37,8 @@ #include #include +#include + decl_simple_lock_data(static,timer_call_lock) static struct { @@ -119,7 +121,7 @@ void _set_delayed_call_timer( timer_call_t call) { - clock_set_timer_deadline(call->deadline); + etimer_set_deadline(call->deadline); } boolean_t @@ -284,6 +286,7 @@ timer_call_interrupt( call = TC(queue_first(queue)); while (!queue_end(queue, qe(call))) { + if (call->deadline <= timestamp) { timer_call_func_t func; timer_call_param_t param0, param1; @@ -296,8 +299,12 @@ timer_call_interrupt( simple_unlock(&timer_call_lock); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) | DBG_FUNC_START, (int)func, param0, param1, 0, 0); + (*func)(param0, param1); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) | DBG_FUNC_END, (int)func, param0, param1, 0, 0); + simple_lock(&timer_call_lock); } else @@ -310,4 +317,5 @@ timer_call_interrupt( _set_delayed_call_timer(call); simple_unlock(&timer_call_lock); + } diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index 8faa4013f..361cf7961 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -1059,7 +1059,11 @@ wait_queue_wakeup64_all_locked( queue_t q = &wake_queue_head; kern_return_t res; - assert(wait_queue_held(wq)); +// assert(wait_queue_held(wq)); + if(!wq->wq_interlock.lock_data) { /* (BRINGUP */ + panic("wait_queue_wakeup64_all_locked: lock not held on %08X\n", wq); /* (BRINGUP) */ + } + queue_init(q); /* @@ -1111,6 +1115,9 @@ wait_queue_wakeup_all( s = splsched(); wait_queue_lock(wq); + if(!wq->wq_interlock.lock_data) { /* (BRINGUP */ + panic("wait_queue_wakeup_all: we did not get the lock on %08X\n", wq); /* (BRINGUP) */ + } ret = wait_queue_wakeup64_all_locked( wq, (event64_t)((uint32_t)event), result, TRUE); diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index bb68d2236..0661d36a3 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -107,7 +107,7 @@ #else /* !defined(__alpha) */ #define is_kernel_data_addr(a) \ - (!(a) || ((a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3))) + (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3))) #endif /* defined(__alpha) */ @@ -1227,7 +1227,7 @@ zone_gc(void) */ scan = (void *)z->free_elements; - (void *)z->free_elements = NULL; + z->free_elements = 0; unlock_zone(z); @@ -1271,7 +1271,7 @@ zone_gc(void) if (keep != NULL) { tail->next = (void *)z->free_elements; - (void *)z->free_elements = keep; + z->free_elements = (vm_offset_t) keep; tail = keep = NULL; } else { m =0; @@ -1283,7 +1283,7 @@ zone_gc(void) } if (m !=0 ) { prev->next = (void *)z->free_elements; - (void *)z->free_elements = (void *)base_elt; + z->free_elements = (vm_offset_t) base_elt; base_prev->next = elt; prev = base_prev; } @@ -1308,7 +1308,7 @@ zone_gc(void) lock_zone(z); tail->next = (void *)z->free_elements; - (void *)z->free_elements = keep; + z->free_elements = (vm_offset_t) keep; unlock_zone(z); } @@ -1361,7 +1361,7 @@ zone_gc(void) if (keep != NULL) { tail->next = (void *)z->free_elements; - (void *)z->free_elements = keep; + z->free_elements = (vm_offset_t) keep; } if (z->waiting) { @@ -1388,7 +1388,7 @@ zone_gc(void) if (keep != NULL) { tail->next = (void *)z->free_elements; - (void *)z->free_elements = keep; + z->free_elements = (vm_offset_t) keep; } } @@ -1474,7 +1474,7 @@ host_zone_info( #ifdef ppc max_zones = num_zones + 4; #else - max_zones = num_zones + 2; + max_zones = num_zones + 3; /* ATN: count the number below!! */ #endif z = first_zone; simple_unlock(&all_zones_lock); @@ -1558,6 +1558,15 @@ host_zone_info( zn++; zi++; #endif + +#ifdef i386 + strcpy(zn->zn_name, "page_tables"); + pt_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, + &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); + zn++; + zi++; +#endif + strcpy(zn->zn_name, "kalloc.large"); kalloc_fake_zone_info(&zi->zi_count, &zi->zi_cur_size, &zi->zi_max_size, &zi->zi_elem_size, &zi->zi_alloc_size, &zi->zi_collectable, &zi->zi_exhaustible); diff --git a/osfmk/mach-o/loader.h b/osfmk/mach-o/loader.h index 2b979ee5b..0422c582e 100644 --- a/osfmk/mach-o/loader.h +++ b/osfmk/mach-o/loader.h @@ -63,7 +63,7 @@ struct mach_header { /* Constant for the magic field of the mach_header */ #define MH_MAGIC 0xfeedface /* the mach magic number */ -#define MH_CIGAM NXSwapInt(MH_MAGIC) +#define MH_CIGAM 0xcefaedfe /* * The layout of the file depends on the filetype. For all but the MH_OBJECT diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index 565a56e24..eb5367490 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -108,9 +108,7 @@ typedef struct host_basic_info_old *host_basic_info_old_t; (sizeof(host_basic_info_data_old_t)/sizeof(integer_t))) #endif -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) struct host_basic_info { integer_t max_cpus; /* max number of CPUs possible */ @@ -126,9 +124,7 @@ struct host_basic_info { uint64_t max_mem; /* actual size of physical memory */ }; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() typedef struct host_basic_info host_basic_info_data_t; typedef struct host_basic_info *host_basic_info_t; diff --git a/osfmk/mach/host_reboot.h b/osfmk/mach/host_reboot.h index efc17f980..076e28f05 100644 --- a/osfmk/mach/host_reboot.h +++ b/osfmk/mach/host_reboot.h @@ -26,7 +26,8 @@ #ifndef _MACH_HOST_REBOOT_ #define _MACH_HOST_REBOOT_ -#define HOST_REBOOT_HALT 0x8 +#define HOST_REBOOT_HALT 0x0008 +#define HOST_REBOOT_UPSDELAY 0x0100 #define HOST_REBOOT_DEBUGGER 0x1000 #endif /* _MACH_HOST_REBOOT_ */ diff --git a/osfmk/mach/i386/boolean.h b/osfmk/mach/i386/boolean.h index 78ae850d4..6dfd2d8fa 100644 --- a/osfmk/mach/i386/boolean.h +++ b/osfmk/mach/i386/boolean.h @@ -59,6 +59,10 @@ #ifndef _MACH_I386_BOOLEAN_H_ #define _MACH_I386_BOOLEAN_H_ +#if defined(__x86_64__) +typedef unsigned int boolean_t; +#else typedef int boolean_t; +#endif #endif /* _MACH_I386_BOOLEAN_H_ */ diff --git a/osfmk/mach/i386/exception.h b/osfmk/mach/i386/exception.h index 3a4e4e9dd..f39d6e6e7 100644 --- a/osfmk/mach/i386/exception.h +++ b/osfmk/mach/i386/exception.h @@ -82,6 +82,7 @@ #define EXC_I386_EXTERR 5 #define EXC_I386_EMERR 6 #define EXC_I386_BOUND 7 +#define EXC_I386_SSEEXTERR 8 /* * EXC_SOFTWARE diff --git a/osfmk/mach/i386/fp_reg.h b/osfmk/mach/i386/fp_reg.h index 9721e52cb..3ab06f2b0 100644 --- a/osfmk/mach/i386/fp_reg.h +++ b/osfmk/mach/i386/fp_reg.h @@ -53,32 +53,9 @@ #ifndef _I386_FP_SAVE_H_ #define _I386_FP_SAVE_H_ -/* - * Floating point registers and status, as saved - * and restored by FP save/restore instructions. - */ -struct i386_fp_save { - unsigned short fp_control; /* control */ - unsigned short fp_unused_1; - unsigned short fp_status; /* status */ - unsigned short fp_unused_2; - unsigned short fp_tag; /* register tags */ - unsigned short fp_unused_3; - unsigned int fp_eip; /* eip at failed instruction */ - unsigned short fp_cs; /* cs at failed instruction */ - unsigned short fp_opcode; /* opcode of failed instruction */ - unsigned int fp_dp; /* data address */ - unsigned short fp_ds; /* data segment */ - unsigned short fp_unused_4; -}; - -struct i386_fp_regs { - unsigned short fp_reg_word[5][8]; - /* space for 8 80-bit FP registers */ -}; /* note when allocating this data structure, it must be 16 byte aligned. */ -struct i386_fx_save { +struct x86_fx_save { unsigned short fx_control; /* control */ unsigned short fx_status; /* status */ unsigned char fx_tag; /* register tags */ @@ -92,9 +69,10 @@ struct i386_fx_save { unsigned short fx_bbz3; /* better be zero when calling fxrtstor */ unsigned int fx_MXCSR; unsigned int fx_MXCSR_MASK; - unsigned short fx_reg_word[8][8]; /* STx/MMx registers */ - unsigned short fx_XMM_reg[8][8]; /* XMM0-XMM7 */ - unsigned char fx_reserved[16*14]; /* reserved by intel for future expansion */ + unsigned short fx_reg_word[8][8]; /* STx/MMx registers */ + unsigned short fx_XMM_reg[8][16]; /* XMM0-XMM15 on 64 bit processors */ + /* XMM0-XMM7 on 32 bit processors... unused storage reserved */ + unsigned char fx_reserved[16*6]; /* reserved by intel for future expansion */ }; diff --git a/osfmk/mach/i386/machine_types.defs b/osfmk/mach/i386/machine_types.defs index d6cb4af54..4132ef6e3 100644 --- a/osfmk/mach/i386/machine_types.defs +++ b/osfmk/mach/i386/machine_types.defs @@ -52,8 +52,13 @@ type double = MACH_MSG_TYPE_REAL_64; * a port in user space as an integer and * in kernel space as a pointer. */ +#if defined(__LP64__) +type uintptr_t = uint64_t; +type intptr_t = int64_t; +#else type uintptr_t = uint32_t; type intptr_t = int32_t; +#endif /* * These are the legacy Mach types that are @@ -61,7 +66,11 @@ type intptr_t = int32_t; * They were defined in terms of int, not * long int, so they remain separate. */ +#if defined(__LP64__) +type register_t = int64_t; +#else type register_t = int32_t; +#endif type integer_t = int32_t; type natural_t = uint32_t; @@ -69,18 +78,25 @@ type natural_t = uint32_t; * These are the VM types that scale with the address * space size of a given process. */ + +#if defined(__LP64__) +type vm_address_t = uint64_t; +type vm_offset_t = uint64_t; +type vm_size_t = uint64_t; +#else type vm_address_t = natural_t; type vm_offset_t = natural_t; type vm_size_t = natural_t; +#endif /* * The mach_vm_xxx_t types are sized to hold the * maximum pointer, offset, etc... supported on the * platform. */ -type mach_vm_address_t = uint32_t; -type mach_vm_offset_t = uint32_t; -type mach_vm_size_t = uint32_t; +type mach_vm_address_t = uint64_t; +type mach_vm_offset_t = uint64_t; +type mach_vm_size_t = uint64_t; #if MACH_IPC_COMPAT /* diff --git a/osfmk/mach/i386/rpc.h b/osfmk/mach/i386/rpc.h index c1d3aa404..e558e6fa7 100644 --- a/osfmk/mach/i386/rpc.h +++ b/osfmk/mach/i386/rpc.h @@ -26,5 +26,5 @@ #ifndef _MACH_I386_RPC_H_ #define _MACH_I386_RPC_H_ -#endif _MACH_I386_RPC_H_ +#endif /* _MACH_I386_RPC_H_ */ diff --git a/osfmk/mach/i386/syscall_sw.h b/osfmk/mach/i386/syscall_sw.h index 1ee827e0f..b0a4affcc 100644 --- a/osfmk/mach/i386/syscall_sw.h +++ b/osfmk/mach/i386/syscall_sw.h @@ -57,14 +57,103 @@ #include -#define MACHCALLSEL $0x07 +#if defined(__i386__) +/* + * Software interrupt codes for 32-bit system call entry: + */ +#define UNIX_INT 0x80 +#define MACH_INT 0x81 +#define MACHDEP_INT 0x82 +#define DIAG_INT 0x83 + +#ifndef KERNEL +/* + * Syscall entry macros for use in libc: + * [Note that the nop padding is temporary during 4/4 transition.] + */ +#define SYSENTER_PAD nop;nop; +#define SYSCALL_PAD nop;nop;nop;nop;nop; +#define UNIX_SYSCALL_TRAP \ + SYSCALL_PAD \ + int $(UNIX_INT) +#define MACHDEP_SYSCALL_TRAP \ + SYSCALL_PAD \ + int $(MACHDEP_INT) + +/* + * Macro to generate Mach call stubs in libc: + */ +#define kernel_trap(trap_name,trap_number,number_args) \ +LEAF(_##trap_name,0) ;\ + movl $##trap_number,%eax ;\ + int $(MACH_INT) ;\ +END(_##trap_name) + +#endif +#endif /* defined(__i386__) */ +#if defined(__x86_64__) + +#ifndef KERNEL + +#define UNIX_SYSCALL_TRAP \ + syscall +#define MACHDEP_SYSCALL_TRAP \ + syscall + +/* + * Macro to generate Mach call stubs in Libc. + * Existing calls use negative numbers for Mach traps, so + * until we change those and change the 32-bit kernel_trap + * macro above, we negate those numbers here for the 64-bit + * code path. + */ #define kernel_trap(trap_name,trap_number,number_args) \ LEAF(_##trap_name,0) ;\ - movl $##trap_number,%eax ;\ - lcall MACHCALLSEL, $0 ;\ + movq %rcx, %r10 ;\ + movl $ SYSCALL_CONSTRUCT_MACH(-##trap_number), %eax ;\ + syscall ;\ END(_##trap_name) +#endif +#endif /* defined(__x86_64__) */ + +/* + * Syscall classes for 64-bit system call entry. + * For 64-bit users, the 32-bit syscall number is partitioned + * with the high-order bits representing the class and low-order + * bits being the syscall number within that class. + * The high-order 32-bits of the 64-bit syscall number are unused. + * All system classes enter the kernel via the syscall instruction. + * + * These are not #ifdef'd for x86-64 because they might be used for + * 32-bit someday and so the 64-bit comm page in a 32-bit kernel + * can use them. + */ +#define SYSCALL_CLASS_SHIFT 24 +#define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) +#define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) + +#define SYSCALL_CLASS_NONE 0 /* Invalid */ +#define SYSCALL_CLASS_MACH 1 /* Mach */ +#define SYSCALL_CLASS_UNIX 2 /* Unix/BSD */ +#define SYSCALL_CLASS_MDEP 3 /* Machine-dependent */ +#define SYSCALL_CLASS_DIAG 4 /* Diagnostics */ + +/* Macros to simpllfy constructing syscall numbers. */ +#define SYSCALL_CONSTRUCT_MACH(syscall_number) \ + ((SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT) | \ + (SYSCALL_NUMBER_MASK & (syscall_number))) +#define SYSCALL_CONSTRUCT_UNIX(syscall_number) \ + ((SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | \ + (SYSCALL_NUMBER_MASK & (syscall_number))) +#define SYSCALL_CONSTRUCT_MDEP(syscall_number) \ + ((SYSCALL_CLASS_MDEP << SYSCALL_CLASS_SHIFT) | \ + (SYSCALL_NUMBER_MASK & (syscall_number))) +#define SYSCALL_CONSTRUCT_DIAG(syscall_number) \ + ((SYSCALL_CLASS_DIAG << SYSCALL_CLASS_SHIFT) | \ + (SYSCALL_NUMBER_MASK & (syscall_number))) + #endif /* _MACH_I386_SYSCALL_SW_H_ */ #endif /* PRIVATE */ diff --git a/osfmk/mach/i386/thread_state.h b/osfmk/mach/i386/thread_state.h index c043c69d0..2eb5259d1 100644 --- a/osfmk/mach/i386/thread_state.h +++ b/osfmk/mach/i386/thread_state.h @@ -26,9 +26,10 @@ #ifndef _MACH_I386_THREAD_STATE_H_ #define _MACH_I386_THREAD_STATE_H_ -#define I386_THREAD_STATE_MAX 144 +/* Size of maximum exported thread state in words */ +#define I386_THREAD_STATE_MAX (144) /* Size of biggest state possible */ -#if defined (__i386__) +#if defined (__i386__) || defined(__x86_64__) #define THREAD_STATE_MAX I386_THREAD_STATE_MAX #endif diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index b41e6e7b7..499d0cb48 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -64,35 +64,58 @@ #include #include #include -#include /* FIXME */ -#include /* FIXME */ +#include + + + /* - * i386_thread_state this is the structure that is exported - * to user threads for use in status/mutate - * calls. This structure should never - * change. - * - * i386_float_state exported to use threads for access to - * floating point registers. Try not to - * change this one, either. + * the i386_xxxx form is kept for legacy purposes since these types + * are externally known... eventually they should be deprecated. + * our internal implementation has moved to the following naming convention * - * i386_isa_port_map_state exported to user threads to allow - * selective in/out operations - * - * i386_v86_assist_state - * - * thread_syscall_state + * x86_xxxx32 names are used to deal with 32 bit states + * x86_xxxx64 names are used to deal with 64 bit states + * x86_xxxx names are used to deal with either 32 or 64 bit states + * via a self-describing mechanism + */ + + + +/* + * these are the legacy names which should be deprecated in the future + * they are externally known which is the only reason we don't just get + * rid of them + */ +#define i386_THREAD_STATE 1 +#define i386_FLOAT_STATE 2 +#define i386_EXCEPTION_STATE 3 + + +/* + * THREAD_STATE_FLAVOR_LIST 0 + * these are the supported flavors */ +#define x86_THREAD_STATE32 1 +#define x86_FLOAT_STATE32 2 +#define x86_EXCEPTION_STATE32 3 +#define x86_THREAD_STATE64 4 +#define x86_FLOAT_STATE64 5 +#define x86_EXCEPTION_STATE64 6 +#define x86_THREAD_STATE 7 +#define x86_FLOAT_STATE 8 +#define x86_EXCEPTION_STATE 9 +#define x86_DEBUG_STATE32 10 +#define x86_DEBUG_STATE64 11 +#define x86_DEBUG_STATE 12 +#define THREAD_STATE_NONE 13 -/* THREAD_STATE_FLAVOR_LIST 0 */ -#define i386_NEW_THREAD_STATE 1 /* used to be i386_THREAD_STATE */ -#define i386_FLOAT_STATE 2 -#define i386_ISA_PORT_MAP_STATE 3 -#define i386_V86_ASSIST_STATE 4 -#define i386_REGS_SEGS_STATE 5 -#define THREAD_SYSCALL_STATE 6 -#define THREAD_STATE_NONE 7 -#define i386_SAVED_STATE 8 + + +/* + * Largest state on this machine: + * (be sure mach/machine/thread_state.h matches!) + */ +#define THREAD_MACHINE_STATE_MAX THREAD_STATE_MAX /* @@ -101,48 +124,442 @@ * platform. The macro must be manually updated to include all of the valid * exception flavors as defined above. */ -#define VALID_THREAD_STATE_FLAVOR(x) \ - ((x == i386_NEW_THREAD_STATE) || \ - (x == i386_FLOAT_STATE) || \ - (x == i386_ISA_PORT_MAP_STATE) || \ - (x == i386_V86_ASSIST_STATE) || \ - (x == i386_REGS_SEGS_STATE) || \ - (x == THREAD_SYSCALL_STATE) || \ - (x == THREAD_STATE_NONE) || \ - (x == i386_SAVED_STATE)) +#define VALID_THREAD_STATE_FLAVOR(x) \ + ((x == x86_THREAD_STATE32) || \ + (x == x86_FLOAT_STATE32) || \ + (x == x86_EXCEPTION_STATE32) || \ + (x == x86_DEBUG_STATE32) || \ + (x == x86_THREAD_STATE64) || \ + (x == x86_FLOAT_STATE64) || \ + (x == x86_EXCEPTION_STATE64) || \ + (x == x86_DEBUG_STATE64) || \ + (x == x86_THREAD_STATE) || \ + (x == x86_FLOAT_STATE) || \ + (x == x86_EXCEPTION_STATE) || \ + (x == x86_DEBUG_STATE) || \ + (x == THREAD_STATE_NONE)) + + + +struct x86_state_hdr { + int flavor; + int count; +}; +typedef struct x86_state_hdr x86_state_hdr_t; + /* - * This structure is used for both - * i386_THREAD_STATE and i386_REGS_SEGS_STATE. + * Main thread state consists of + * general registers, segment registers, + * eip and eflags. */ -struct i386_new_thread_state { - unsigned int gs; - unsigned int fs; - unsigned int es; - unsigned int ds; - unsigned int edi; - unsigned int esi; - unsigned int ebp; - unsigned int esp; - unsigned int ebx; - unsigned int edx; - unsigned int ecx; - unsigned int eax; + +struct i386_thread_state { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + unsigned int edi; + unsigned int esi; + unsigned int ebp; + unsigned int esp; + unsigned int ss; + unsigned int eflags; + unsigned int eip; + unsigned int cs; + unsigned int ds; + unsigned int es; + unsigned int fs; + unsigned int gs; +} ; + +/* + * to be depecrated in the future + */ +typedef struct i386_thread_state i386_thread_state_t; +#define i386_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_thread_state_t) / sizeof (int) )) + + +typedef struct i386_thread_state x86_thread_state32_t; +#define x86_THREAD_STATE32_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_thread_state32_t) / sizeof (int) )) + + + + +struct x86_thread_state64 { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; + uint64_t rflags; + uint64_t cs; + uint64_t fs; + uint64_t gs; +} ; + + +typedef struct x86_thread_state64 x86_thread_state64_t; +#define x86_THREAD_STATE64_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_thread_state64_t) / sizeof (int) )) + + + + +struct x86_thread_state { + x86_state_hdr_t tsh; + union { + x86_thread_state32_t ts32; + x86_thread_state64_t ts64; + } uts; +} ; + + +typedef struct x86_thread_state x86_thread_state_t; +#define x86_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_thread_state_t) / sizeof (int) )) + + + +/* + * Default segment register values. + */ + +#define USER_CODE_SELECTOR 0x0017 +#define USER_DATA_SELECTOR 0x001f +#define KERN_CODE_SELECTOR 0x0008 +#define KERN_DATA_SELECTOR 0x0010 + +typedef struct fp_control { + unsigned short invalid :1, + denorm :1, + zdiv :1, + ovrfl :1, + undfl :1, + precis :1, + :2, + pc :2, +#define FP_PREC_24B 0 +#define FP_PREC_53B 2 +#define FP_PREC_64B 3 + rc :2, +#define FP_RND_NEAR 0 +#define FP_RND_DOWN 1 +#define FP_RND_UP 2 +#define FP_CHOP 3 + /*inf*/ :1, + :3; +} fp_control_t; +/* + * Status word. + */ + +typedef struct fp_status { + unsigned short invalid :1, + denorm :1, + zdiv :1, + ovrfl :1, + undfl :1, + precis :1, + stkflt :1, + errsumm :1, + c0 :1, + c1 :1, + c2 :1, + tos :3, + c3 :1, + busy :1; +} fp_status_t; + +/* defn of 80bit x87 FPU or MMX register */ +struct mmst_reg { + char mmst_reg[10]; + char mmst_rsrv[6]; +}; + + +/* defn of 128 bit XMM regs */ +struct xmm_reg { + char xmm_reg[16]; +}; + +/* + * Floating point state. + */ + +#define FP_STATE_BYTES 512 /* number of chars worth of data from fpu_fcw */ + +/* For legacy reasons we need to leave the hw_state as char bytes */ +struct i386_float_state { + int fpu_reserved[2]; + fp_control_t fpu_fcw; /* x87 FPU control word */ + fp_status_t fpu_fsw; /* x87 FPU status word */ + uint8_t fpu_ftw; /* x87 FPU tag word */ + uint8_t fpu_rsrv1; /* reserved */ + uint16_t fpu_fop; /* x87 FPU Opcode */ + uint32_t fpu_ip; /* x87 FPU Instruction Pointer offset */ + uint16_t fpu_cs; /* x87 FPU Instruction Pointer Selector */ + uint16_t fpu_rsrv2; /* reserved */ + uint32_t fpu_dp; /* x87 FPU Instruction Operand(Data) Pointer offset */ + uint16_t fpu_ds; /* x87 FPU Instruction Operand(Data) Pointer Selector */ + uint16_t fpu_rsrv3; /* reserved */ + uint32_t fpu_mxcsr; /* MXCSR Register state */ + uint32_t fpu_mxcsrmask; /* MXCSR mask */ + struct mmst_reg fpu_stmm0; /* ST0/MM0 */ + struct mmst_reg fpu_stmm1; /* ST1/MM1 */ + struct mmst_reg fpu_stmm2; /* ST2/MM2 */ + struct mmst_reg fpu_stmm3; /* ST3/MM3 */ + struct mmst_reg fpu_stmm4; /* ST4/MM4 */ + struct mmst_reg fpu_stmm5; /* ST5/MM5 */ + struct mmst_reg fpu_stmm6; /* ST6/MM6 */ + struct mmst_reg fpu_stmm7; /* ST7/MM7 */ + struct xmm_reg fpu_xmm0; /* XMM 0 */ + struct xmm_reg fpu_xmm1; /* XMM 1 */ + struct xmm_reg fpu_xmm2; /* XMM 2 */ + struct xmm_reg fpu_xmm3; /* XMM 3 */ + struct xmm_reg fpu_xmm4; /* XMM 4 */ + struct xmm_reg fpu_xmm5; /* XMM 5 */ + struct xmm_reg fpu_xmm6; /* XMM 6 */ + struct xmm_reg fpu_xmm7; /* XMM 7 */ + char fpu_rsrv4[14*16]; /* reserved */ + int fpu_reserved1; +}; + + +/* + * to be depecrated in the future + */ +typedef struct i386_float_state i386_float_state_t; +#define i386_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(i386_float_state_t)/sizeof(unsigned int))) + +typedef struct i386_float_state x86_float_state32_t; +#define x86_FLOAT_STATE32_COUNT ((mach_msg_type_number_t) \ + (sizeof(x86_float_state32_t)/sizeof(unsigned int))) + + +struct x86_float_state64 { + int fpu_reserved[2]; + fp_control_t fpu_fcw; /* x87 FPU control word */ + fp_status_t fpu_fsw; /* x87 FPU status word */ + uint8_t fpu_ftw; /* x87 FPU tag word */ + uint8_t fpu_rsrv1; /* reserved */ + uint16_t fpu_fop; /* x87 FPU Opcode */ + uint32_t fpu_ip; /* x87 FPU Instruction Pointer offset */ + uint16_t fpu_cs; /* x87 FPU Instruction Pointer Selector */ + uint16_t fpu_rsrv2; /* reserved */ + uint32_t fpu_dp; /* x87 FPU Instruction Operand(Data) Pointer offset */ + uint16_t fpu_ds; /* x87 FPU Instruction Operand(Data) Pointer Selector */ + uint16_t fpu_rsrv3; /* reserved */ + uint32_t fpu_mxcsr; /* MXCSR Register state */ + uint32_t fpu_mxcsrmask; /* MXCSR mask */ + struct mmst_reg fpu_stmm0; /* ST0/MM0 */ + struct mmst_reg fpu_stmm1; /* ST1/MM1 */ + struct mmst_reg fpu_stmm2; /* ST2/MM2 */ + struct mmst_reg fpu_stmm3; /* ST3/MM3 */ + struct mmst_reg fpu_stmm4; /* ST4/MM4 */ + struct mmst_reg fpu_stmm5; /* ST5/MM5 */ + struct mmst_reg fpu_stmm6; /* ST6/MM6 */ + struct mmst_reg fpu_stmm7; /* ST7/MM7 */ + struct xmm_reg fpu_xmm0; /* XMM 0 */ + struct xmm_reg fpu_xmm1; /* XMM 1 */ + struct xmm_reg fpu_xmm2; /* XMM 2 */ + struct xmm_reg fpu_xmm3; /* XMM 3 */ + struct xmm_reg fpu_xmm4; /* XMM 4 */ + struct xmm_reg fpu_xmm5; /* XMM 5 */ + struct xmm_reg fpu_xmm6; /* XMM 6 */ + struct xmm_reg fpu_xmm7; /* XMM 7 */ + struct xmm_reg fpu_xmm8; /* XMM 8 */ + struct xmm_reg fpu_xmm9; /* XMM 9 */ + struct xmm_reg fpu_xmm10; /* XMM 10 */ + struct xmm_reg fpu_xmm11; /* XMM 11 */ + struct xmm_reg fpu_xmm12; /* XMM 12 */ + struct xmm_reg fpu_xmm13; /* XMM 13 */ + struct xmm_reg fpu_xmm14; /* XMM 14 */ + struct xmm_reg fpu_xmm15; /* XMM 15 */ + char fpu_rsrv4[6*16]; /* reserved */ + int fpu_reserved1; +}; + +typedef struct x86_float_state64 x86_float_state64_t; +#define x86_FLOAT_STATE64_COUNT ((mach_msg_type_number_t) \ + (sizeof(x86_float_state64_t)/sizeof(unsigned int))) + + + + +struct x86_float_state { + x86_state_hdr_t fsh; + union { + x86_float_state32_t fs32; + x86_float_state64_t fs64; + } ufs; +} ; + + +typedef struct x86_float_state x86_float_state_t; +#define x86_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_float_state_t) / sizeof (int) )) + + + +/* + * Extra state that may be + * useful to exception handlers. + */ + +struct i386_exception_state { + unsigned int trapno; + unsigned int err; + unsigned int faultvaddr; +}; + +/* + * to be depecrated in the future + */ +typedef struct i386_exception_state i386_exception_state_t; +#define i386_EXCEPTION_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_exception_state_t) / sizeof (int) )) + +#define I386_EXCEPTION_STATE_COUNT i386_EXCEPTION_STATE_COUNT + +typedef struct i386_exception_state x86_exception_state32_t; +#define x86_EXCEPTION_STATE32_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_exception_state32_t) / sizeof (int) )) + +struct x86_debug_state32 { + unsigned int dr0; + unsigned int dr1; + unsigned int dr2; + unsigned int dr3; + unsigned int dr4; + unsigned int dr5; + unsigned int dr6; + unsigned int dr7; +}; + +typedef struct x86_debug_state32 x86_debug_state32_t; +#define x86_DEBUG_STATE32_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_debug_state32_t) / sizeof (int) )) +#define X86_DEBUG_STATE32_COUNT x86_DEBUG_STATE32_COUNT + + +struct x86_exception_state64 { + unsigned int trapno; + unsigned int err; + uint64_t faultvaddr; +}; + +typedef struct x86_exception_state64 x86_exception_state64_t; +#define x86_EXCEPTION_STATE64_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_exception_state64_t) / sizeof (int) )) + + +struct x86_debug_state64 { + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr4; + uint64_t dr5; + uint64_t dr6; + uint64_t dr7; +}; + + +typedef struct x86_debug_state64 x86_debug_state64_t; +#define x86_DEBUG_STATE64_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_debug_state64_t) / sizeof (int) )) + +#define X86_DEBUG_STATE64_COUNT x86_DEBUG_STATE64_COUNT + + + +struct x86_exception_state { + x86_state_hdr_t esh; + union { + x86_exception_state32_t es32; + x86_exception_state64_t es64; + } ues; +} ; + + +typedef struct x86_exception_state x86_exception_state_t; +#define x86_EXCEPTION_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (x86_exception_state_t) / sizeof (int) )) + +struct x86_debug_state { + x86_state_hdr_t dsh; + union { + x86_debug_state32_t ds32; + x86_debug_state64_t ds64; + } uds; +}; + + + +typedef struct x86_debug_state x86_debug_state_t; +#define x86_DEBUG_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(x86_debug_state_t)/sizeof(unsigned int))) + +/* + * Machine-independent way for servers and Mach's exception mechanism to + * choose the most efficient state flavor for exception RPC's: + */ +#define MACHINE_THREAD_STATE x86_THREAD_STATE +#define MACHINE_THREAD_STATE_COUNT x86_THREAD_STATE_COUNT + + +#ifdef XNU_KERNEL_PRIVATE + +#define x86_SAVED_STATE32 THREAD_STATE_NONE + 1 +#define x86_SAVED_STATE64 THREAD_STATE_NONE + 2 + +#define OLD_i386_THREAD_STATE -1 + + +/* + * when reloading the segment registers on + * a return out of the kernel, we may take + * a GeneralProtection or SegmentNotPresent + * fault if one or more of the segment + * registers in the saved state was improperly + * specified via an x86_THREAD_STATE32 call + * the frame we push on top of the existing + * save area looks like this... we need to + * carry this as part of the save area + * in case we get hit so that we have a big + * enough stack + */ +struct x86_seg_load_fault32 { + unsigned int trapno; + unsigned int err; unsigned int eip; unsigned int cs; unsigned int efl; - unsigned int uesp; - unsigned int ss; }; -#define i386_NEW_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof (struct i386_new_thread_state)/sizeof(unsigned int))) + /* * Subset of saved state stored by processor on kernel-to-kernel * trap. (Used by ddb to examine state guaranteed to be present * on all traps into debugger.) */ -struct i386_saved_state_from_kernel { +struct x86_saved_state32_from_kernel { unsigned int gs; unsigned int fs; unsigned int es; @@ -150,8 +567,7 @@ struct i386_saved_state_from_kernel { unsigned int edi; unsigned int esi; unsigned int ebp; - unsigned int esp; /* kernel esp stored by pusha - - we save cr2 here later */ + unsigned int cr2; /* kernel esp stored by pusha - we save cr2 here later */ unsigned int ebx; unsigned int edx; unsigned int ecx; @@ -168,7 +584,8 @@ struct i386_saved_state_from_kernel { * state flavor is most efficient for exception RPC's to kernel-loaded * servers, because copying can be avoided: */ -struct i386_saved_state { + +struct x86_saved_state32 { unsigned int gs; unsigned int fs; unsigned int es; @@ -176,8 +593,7 @@ struct i386_saved_state { unsigned int edi; unsigned int esi; unsigned int ebp; - unsigned int esp; /* kernel esp stored by pusha - - we save cr2 here later */ + unsigned int cr2; /* kernel esp stored by pusha - we save cr2 here later */ unsigned int ebx; unsigned int edx; unsigned int ecx; @@ -189,206 +605,175 @@ struct i386_saved_state { unsigned int efl; unsigned int uesp; unsigned int ss; - struct v86_segs { - unsigned int v86_es; /* virtual 8086 segment registers */ - unsigned int v86_ds; - unsigned int v86_fs; - unsigned int v86_gs; - } v86_segs; -#define i386_SAVED_ARGV_COUNT 7 - unsigned int argv_status; /* Boolean flag indicating whether or - * not Mach copied in the args */ - unsigned int argv[i386_SAVED_ARGV_COUNT]; - /* The return address, and the first several - * function call args from the stack, for - * efficient syscall exceptions */ }; -#define i386_SAVED_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof (struct i386_saved_state)/sizeof(unsigned int))) -#define i386_REGS_SEGS_STATE_COUNT i386_SAVED_STATE_COUNT +typedef struct x86_saved_state32 x86_saved_state32_t; -/* - * Machine-independent way for servers and Mach's exception mechanism to - * choose the most efficient state flavor for exception RPC's: - */ -#define MACHINE_THREAD_STATE i386_SAVED_STATE -#define MACHINE_THREAD_STATE_COUNT 144 +#define x86_SAVED_STATE32_COUNT ((mach_msg_type_number_t) \ + (sizeof (x86_saved_state32_t)/sizeof(unsigned int))) -/* - * Largest state on this machine: - * (be sure mach/machine/thread_state.h matches!) - */ -#define THREAD_MACHINE_STATE_MAX THREAD_STATE_MAX - -/* - * Floating point state. - * - * fpkind tells in what way floating point operations are supported. - * See the values for fp_kind in . - * - * If the kind is FP_NO, then calls to set the state will fail, and - * thread_getstatus will return garbage for the rest of the state. - * If "initialized" is false, then the rest of the state is garbage. - * Clients can set "initialized" to false to force the coprocessor to - * be reset. - * "exc_status" is non-zero if the thread has noticed (but not - * proceeded from) a coprocessor exception. It contains the status - * word with the exception bits set. The status word in "fp_status" - * will have the exception bits turned off. If an exception bit in - * "fp_status" is turned on, then "exc_status" should be zero. This - * happens when the coprocessor exception is noticed after the system - * has context switched to some other thread. - * - * If kind is FP_387, then "state" is a i387_state. Other kinds might - * also use i387_state, but somebody will have to verify it (XXX). - * Note that the registers are ordered from top-of-stack down, not - * according to physical register number. - */ - -#define FP_STATE_BYTES 512 - -struct i386_float_state { - int fpkind; /* FP_NO..FP_387 (readonly) */ - int initialized; - unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */ - int exc_status; /* exception status (readonly) */ +struct x86_saved_state32_tagged { + uint32_t tag; + struct x86_saved_state32 state; }; -#define i386_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof(struct i386_float_state)/sizeof(unsigned int))) - - -#define FP_old_STATE_BYTES ((mach_msg_type_number_t) \ - (sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs))) - -struct i386_old_float_state { - int fpkind; /* FP_NO..FP_387 (readonly) */ - int initialized; - unsigned char hw_state[FP_old_STATE_BYTES]; /* actual "hardware" state */ - int exc_status; /* exception status (readonly) */ +typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t; + +struct x86_sframe32 { + /* + * in case we throw a fault reloading + * segment registers on a return out of + * the kernel... the 'slf' state is only kept + * long enough to rejigger (i.e. restore + * the save area to its original state) + * the save area and throw the appropriate + * kernel trap pointing to the 'ssf' state + */ + struct x86_seg_load_fault32 slf; + struct x86_saved_state32_tagged ssf; }; -#define i386_old_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof(struct i386_old_float_state)/sizeof(unsigned int))) +typedef struct x86_sframe32 x86_sframe32_t; -#define PORT_MAP_BITS 0x400 -struct i386_isa_port_map_state { - unsigned char pm[PORT_MAP_BITS>>3]; -}; - -#define i386_ISA_PORT_MAP_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof(struct i386_isa_port_map_state)/sizeof(unsigned int))) /* - * V8086 assist supplies a pointer to an interrupt - * descriptor table in task space. + * This is the state pushed onto the 64-bit interrupt stack + * on any exception/trap/interrupt. */ -struct i386_v86_assist_state { - unsigned int int_table; /* interrupt table address */ - int int_count; /* interrupt table size */ -}; - -struct v86_interrupt_table { - unsigned int count; /* count of pending interrupts */ - unsigned short mask; /* ignore this interrupt if true */ - unsigned short vec; /* vector to take */ +struct x86_64_intr_stack_frame { + uint32_t trapno; + uint32_t trapfn; + uint64_t err; + uint64_t rip; + uint64_t cs; + uint64_t rflags; + uint64_t rsp; + uint64_t ss; }; - -#define i386_V86_ASSIST_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof(struct i386_v86_assist_state)/sizeof(unsigned int))) - -struct thread_syscall_state { - unsigned eax; - unsigned edx; - unsigned efl; - unsigned eip; - unsigned esp; -}; - -#define i386_THREAD_SYSCALL_STATE_COUNT ((mach_msg_type_number_t) \ - (sizeof(struct thread_syscall_state) / sizeof(unsigned int))) +typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t; /* - * Main thread state consists of - * general registers, segment registers, - * eip and eflags. + * This defines the state saved before entry into compatibility mode. + * The machine state is pushed automatically and the compat state is + * synthethized in the exception handling code. */ +struct x86_saved_state_compat32 { + struct x86_saved_state32_tagged iss32; + uint32_t pad_for_16byte_alignment[2]; + struct x86_64_intr_stack_frame isf64; +}; +typedef struct x86_saved_state_compat32 x86_saved_state_compat32_t; -#define i386_THREAD_STATE -1 - -typedef struct { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int edi; - unsigned int esi; - unsigned int ebp; - unsigned int esp; - unsigned int ss; - unsigned int eflags; - unsigned int eip; - unsigned int cs; - unsigned int ds; - unsigned int es; - unsigned int fs; - unsigned int gs; -} i386_thread_state_t; - -#define i386_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ - ( sizeof (i386_thread_state_t) / sizeof (int) )) - -/* - * Default segment register values. - */ - -#define USER_CODE_SELECTOR 0x0017 -#define USER_DATA_SELECTOR 0x001f -#define KERN_CODE_SELECTOR 0x0008 -#define KERN_DATA_SELECTOR 0x0010 -/* - * Thread floating point state - * includes FPU environment as - * well as the register stack. - */ - -#define i386_THREAD_FPSTATE -2 +struct x86_sframe_compat32 { + struct x86_64_intr_stack_frame slf; + uint32_t pad_for_16byte_alignment[2]; + struct x86_saved_state_compat32 ssf; + uint32_t empty[4]; +}; +typedef struct x86_sframe_compat32 x86_sframe_compat32_t; -typedef struct { - fp_env_t environ; - fp_stack_t stack; -} i386_thread_fpstate_t; -#define i386_THREAD_FPSTATE_COUNT ((mach_msg_type_number_t) \ - ( sizeof (i386_thread_fpstate_t) / sizeof (int) )) /* - * Extra state that may be - * useful to exception handlers. + * thread state format for task running in 64bit long mode + * in long mode, the same hardware frame is always pushed regardless + * of whether there was a change in privlege level... therefore, there + * is no need for an x86_saved_state64_from_kernel variant */ -#define i386_THREAD_EXCEPTSTATE -3 +struct x86_saved_state64 { + /* + * saved state organized to reflect the + * system call ABI register convention + * so that we can just pass a pointer + * to the saved state when calling through + * to the actual system call functions + * the ABI limits us to 6 args passed in + * registers... I've add v_arg6 - v_arg8 + * to accomodate our most 'greedy' system + * calls (both BSD and MACH)... the individual + * system call handlers will fill these in + * via copyin if needed... + */ + uint64_t rdi; /* arg0 for system call */ + uint64_t rsi; + uint64_t rdx; + uint64_t r10; + uint64_t r8; + uint64_t r9; /* arg5 for system call */ + uint64_t v_arg6; + uint64_t v_arg7; + uint64_t v_arg8; + + uint64_t cr2; + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + uint64_t r11; + uint64_t rbp; + uint64_t rbx; + uint64_t rcx; + uint64_t rax; + + uint32_t gs; + uint32_t fs; + struct x86_64_intr_stack_frame isf; +}; +typedef struct x86_saved_state64 x86_saved_state64_t; +#define x86_SAVED_STATE64_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct x86_saved_state64)/sizeof(unsigned int))) -typedef struct { - unsigned int trapno; - err_code_t err; -} i386_thread_exceptstate_t; +struct x86_saved_state64_tagged { + uint32_t tag; + x86_saved_state64_t state; +}; +typedef struct x86_saved_state64_tagged x86_saved_state64_tagged_t; -#define i386_THREAD_EXCEPTSTATE_COUNT ((mach_msg_type_number_t) \ - ( sizeof (i386_thread_exceptstate_t) / sizeof (int) )) +struct x86_sframe64 { + struct x86_64_intr_stack_frame slf; + uint32_t pad_for_16byte_alignment[3]; + struct x86_saved_state64_tagged ssf; +}; +typedef struct x86_sframe64 x86_sframe64_t; +extern uint32_t get_eflags_exportmask(void); /* - * Per-thread variable used - * to store 'self' id for cthreads. + * Unified, tagged saved state: */ - -#define i386_THREAD_CTHREADSTATE -4 - typedef struct { - unsigned int self; -} i386_thread_cthreadstate_t; - -#define i386_THREAD_CTHREADSTATE_COUNT ((mach_msg_type_number_t) \ - ( sizeof (i386_thread_cthreadstate_t) / sizeof (int) )) + uint32_t flavor; + union { + x86_saved_state32_t ss_32; + x86_saved_state64_t ss_64; + } uss; +} x86_saved_state_t; +#define ss_32 uss.ss_32 +#define ss_64 uss.ss_64 + +static inline boolean_t +is_saved_state64(x86_saved_state_t *iss) +{ + return (iss->flavor == x86_SAVED_STATE64); +} + +static inline boolean_t +is_saved_state32(x86_saved_state_t *iss) +{ + return (iss->flavor == x86_SAVED_STATE32); +} + +static inline x86_saved_state32_t * +saved_state32(x86_saved_state_t *iss) +{ + return &iss->ss_32; +} + +static inline x86_saved_state64_t * +saved_state64(x86_saved_state_t *iss) +{ + return &iss->ss_64; +} + +#endif /* XNU_KERNEL_PRIVATE */ #endif /* _MACH_I386_THREAD_STATUS_H_ */ diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h index abc489418..4d1a7767f 100644 --- a/osfmk/mach/i386/vm_param.h +++ b/osfmk/mach/i386/vm_param.h @@ -112,7 +112,27 @@ ~(I386_PGBYTES-1)) #define i386_trunc_page(x) (((pmap_paddr_t)(x)) & ~(I386_PGBYTES-1)) -#define VM_MAX_PAGE_ADDRESS 0x00000000C0000000ULL + + +#define VM_MIN_ADDRESS64 ((user_addr_t) 0x0000000000000000ULL) +/* + * default top of user stack... it grows down from here + */ +#define VM_USRSTACK64 ((user_addr_t) 0x00007FFF5FC00000ULL) +#define VM_DYLD64 ((user_addr_t) 0x00007FFF5FC00000ULL) +#define VM_LIB64_SHR_DATA ((user_addr_t) 0x00007FFF60000000ULL) +#define VM_LIB64_SHR_TEXT ((user_addr_t) 0x00007FFF80000000ULL) +/* + * the end of the usable user address space , for now about 47 bits. + * the 64 bit commpage is past the end of this + */ +#define VM_MAX_PAGE_ADDRESS ((user_addr_t) 0x00007FFFFFE00000ULL) +/* + * canonical end of user address space for limits checking + */ +#define VM_MAX_USER_PAGE_ADDRESS ((user_addr_t)0x00007FFFFFFFF000ULL) + + /* system-wide values */ #define MACH_VM_MIN_ADDRESS ((mach_vm_offset_t) 0) @@ -120,15 +140,30 @@ /* process-relative values (all 32-bit legacy only for now) */ #define VM_MIN_ADDRESS ((vm_offset_t) 0) -#define VM_MAX_ADDRESS ((vm_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) +#define VM_USRSTACK32 ((vm_offset_t) 0xC0000000) +#define VM_MAX_ADDRESS ((vm_offset_t) 0xFFE00000) + + + #ifdef KERNEL_PRIVATE /* Kernel-wide values */ -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0xC0000000U) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFfffffffU) +#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000U) +/* + * XXX + * The kernel max VM address is limited to 0xFF3FFFFF for now because + * some data structures are explicitly allocated at 0xFF400000 without + * VM's knowledge (see osfmk/i386/locore.s for the allocation of PTmap and co.). + * We can't let VM allocate memory from there. + */ + +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFE7FFFFF) #define KERNEL_STACK_SIZE (I386_PGBYTES*4) +#define VM_MAP_MIN_ADDRESS MACH_VM_MIN_ADDRESS +#define VM_MAP_MAX_ADDRESS MACH_VM_MAX_ADDRESS + /* FIXME - always leave like this? */ #define INTSTACK_SIZE (I386_PGBYTES*4) @@ -139,11 +174,13 @@ #define VM32_MIN_ADDRESS ((vm32_offset_t) 0) #define VM32_MAX_ADDRESS ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) -#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0xc0000000) +#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0x00000000) -#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x0c000000U) +#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x00000000U) #define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1fffffffU) +#define NCOPY_WINDOWS 4 + /* * Conversion between 80386 pages and VM pages */ @@ -175,6 +212,9 @@ ); \ MACRO_END +#define IS_USERADDR64_CANONICAL(addr) \ + ((addr) < (VM_MAX_USER_PAGE_ADDRESS + PAGE_SIZE)) + #endif /* MACH_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/mach/i386/vm_types.h b/osfmk/mach/i386/vm_types.h index f432e35a6..afdf682c9 100644 --- a/osfmk/mach/i386/vm_types.h +++ b/osfmk/mach/i386/vm_types.h @@ -74,9 +74,9 @@ * * They also had an implicit "same size as pointer" characteristic * to them (i.e. Mach's traditional types are very ILP32 or ILP64 - * centric). We will likely support x86 ABIs that do not follow - * either ofthese models (specifically LP64). Therefore, we had to - * make a choice between making these types scale with pointers or stay + * centric). We support x86 ABIs that do not follow either of + * these models (specifically LP64). Therefore, we had to make a + * choice between making these types scale with pointers or stay * tied to integers. Because their use is predominantly tied to * to the size of an integer, we are keeping that association and * breaking free from pointer size guarantees. @@ -90,14 +90,22 @@ typedef int integer_t; * A vm_offset_t is a type-neutral pointer, * e.g. an offset into a virtual memory space. */ +#ifdef __LP64__ +typedef uintptr_t vm_offset_t; +#else /* __LP64__ */ typedef natural_t vm_offset_t; +#endif /* __LP64__ */ /* * A vm_size_t is the proper type for e.g. * expressing the difference between two * vm_offset_t entities. */ +#ifdef __LP64__ +typedef uintptr_t vm_size_t; +#else /* __LP64__ */ typedef natural_t vm_size_t; +#endif /* __LP64__ */ /* * This new type is independent of a particular vm map's @@ -106,23 +114,19 @@ typedef natural_t vm_size_t; * where the size of the map is not known - or we don't * want to have to distinguish. */ -typedef uint32_t mach_vm_address_t; -typedef uint32_t mach_vm_offset_t; -typedef uint32_t mach_vm_size_t; +typedef uint64_t mach_vm_address_t; +typedef uint64_t mach_vm_offset_t; +typedef uint64_t mach_vm_size_t; /* LP64todo - convert these over for good */ -#if 0 +#if 1 typedef uint64_t vm_map_offset_t; typedef uint64_t vm_map_address_t; typedef uint64_t vm_map_size_t; -#define VM_MAP_MIN_ADDRESS MACH_VM_MIN_ADDRESS -#define VM_MAP_MAX_ADDRESS MACH_VM_MAX_ADDRESS #else typedef uint32_t vm_map_offset_t; typedef uint32_t vm_map_address_t; typedef uint32_t vm_map_size_t; -#define VM_MAP_MIN_ADDRESS VM_MIN_ADDRESS -#define VM_MAP_MAX_ADDRESS VM_MAX_ADDRESS #endif #ifdef MACH_KERNEL_PRIVATE diff --git a/osfmk/mach/kmod.h b/osfmk/mach/kmod.h index 7b75bb452..f63827898 100644 --- a/osfmk/mach/kmod.h +++ b/osfmk/mach/kmod.h @@ -43,9 +43,7 @@ typedef void* kmod_args_t; #define KMOD_MAX_NAME 64 -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) /* LP64todo - not 64-bit safe */ typedef struct kmod_reference { @@ -53,9 +51,7 @@ typedef struct kmod_reference { struct kmod_info *info; } kmod_reference_t; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() /**************************************************************************************/ /* warning any changes to this structure affect the following macros. */ @@ -67,9 +63,7 @@ typedef struct kmod_reference { typedef kern_return_t kmod_start_func_t(struct kmod_info *ki, void *data); typedef kern_return_t kmod_stop_func_t(struct kmod_info *ki, void *data); -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) /* LP64todo - not 64-bit safe */ @@ -88,9 +82,7 @@ typedef struct kmod_info { kmod_stop_func_t *stop; } kmod_info_t; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() typedef kmod_info_t *kmod_info_array_t; diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h index 660b641ef..17514527b 100644 --- a/osfmk/mach/mach_traps.h +++ b/osfmk/mach/mach_traps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -196,6 +196,11 @@ extern kern_return_t task_for_pid( int pid, mach_port_name_t *t); +extern kern_return_t task_name_for_pid( + mach_port_name_t target_tport, + int pid, + mach_port_name_t *tn); + extern kern_return_t pid_for_task( mach_port_name_t t, int *x); @@ -217,13 +222,8 @@ extern kern_return_t map_fd( #ifdef XNU_KERNEL_PRIVATE /* Syscall data translations routines */ -#ifdef __ppc__ #define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ ? 0 : sizeof(uint64_t) - sizeof(t)) -#else -#define PAD_(t) (sizeof(register_t) <= sizeof(t) \ - ? 0 : sizeof(register_t) - sizeof(t)) -#endif #if BYTE_ORDER == LITTLE_ENDIAN #define PADL_(t) 0 @@ -406,6 +406,14 @@ struct task_for_pid_args { extern kern_return_t task_for_pid( struct task_for_pid_args *args); +struct task_name_for_pid_args { + PAD_ARG_(mach_port_name_t, target_tport); + PAD_ARG_(int, pid); + PAD_ARG_(user_addr_t, t); +}; +extern kern_return_t task_name_for_pid( + struct task_name_for_pid_args *args); + struct pid_for_task_args { PAD_ARG_(mach_port_name_t, t); PAD_ARG_(user_addr_t, pid); @@ -519,17 +527,6 @@ struct mk_timer_cancel_trap_args { extern kern_return_t mk_timer_cancel_trap( struct mk_timer_cancel_trap_args *args); -/* no user-level prototype for this one */ -struct mk_timebase_info_trap_args { - PAD_ARG_(uint32_t *, delta); - PAD_ARG_(uint32_t *, abs_to_ns_numer); - PAD_ARG_(uint32_t *, abs_to_ns_denom); - PAD_ARG_(uint32_t *, proc_to_abs_numer); - PAD_ARG_(uint32_t *, proc_to_abs_denom); -}; -extern void mk_timebase_info_trap( - struct mk_timebase_info_trap_args *args); - /* not published to LP64 clients yet */ struct iokit_user_client_trap_args { PAD_ARG_(void *, userClientRef); diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs index 4613dea3f..dffdb8e2a 100644 --- a/osfmk/mach/mach_types.defs +++ b/osfmk/mach/mach_types.defs @@ -98,6 +98,14 @@ type task_t = mach_port_t #endif /* KERNEL_SERVER */ ; +type task_name_t = mach_port_t +#if KERNEL_SERVER + intran: task_name_t convert_port_to_task_name(mach_port_t) + outtran: mach_port_t convert_task_name_to_port(task_name_t) + destructor: task_name_deallocate(task_name_t) +#endif /* KERNEL_SERVER */ + ; + type thread_t = mach_port_t #if KERNEL_SERVER intran: thread_t convert_port_to_thread(mach_port_t) diff --git a/osfmk/mach/mach_types.h b/osfmk/mach/mach_types.h index 486d3be41..e75e24303 100644 --- a/osfmk/mach/mach_types.h +++ b/osfmk/mach/mach_types.h @@ -103,7 +103,7 @@ * If we are in the kernel, then pick up the kernel definitions for * the basic mach types. */ -typedef struct task *task_t; +typedef struct task *task_t, *task_name_t; typedef struct thread *thread_t, *thread_act_t; typedef struct ipc_space *ipc_space_t; typedef struct host *host_t; @@ -145,6 +145,7 @@ __END_DECLS * ports at user-space. */ typedef mach_port_t task_t; +typedef mach_port_t task_name_t; typedef mach_port_t thread_t; typedef mach_port_t thread_act_t; typedef mach_port_t ipc_space_t; @@ -230,6 +231,7 @@ typedef exception_handler_array_t exception_port_arrary_t; #define TASK_NULL ((task_t) 0) +#define TASK_NAME_NULL ((task_name_t) 0) #define THREAD_NULL ((thread_t) 0) #define THR_ACT_NULL ((thread_act_t) 0) #define IPC_SPACE_NULL ((ipc_space_t) 0) diff --git a/osfmk/mach/machine.h b/osfmk/mach/machine.h index 130a23ce7..7c88e5836 100644 --- a/osfmk/mach/machine.h +++ b/osfmk/mach/machine.h @@ -134,7 +134,10 @@ __END_DECLS /* skip ((cpu_type_t) 4) */ /* skip ((cpu_type_t) 5) */ #define CPU_TYPE_MC680x0 ((cpu_type_t) 6) -#define CPU_TYPE_I386 ((cpu_type_t) 7) +#define CPU_TYPE_X86 ((cpu_type_t) 7) +#define CPU_TYPE_I386 CPU_TYPE_X86 /* compatibility */ +#define CPU_TYPE_X86_64 (CPU_TYPE_X86 | CPU_ARCH_ABI64) + /* skip CPU_TYPE_MIPS ((cpu_type_t) 8) */ /* skip ((cpu_type_t) 9) */ #define CPU_TYPE_MC98000 ((cpu_type_t) 10) @@ -217,19 +220,32 @@ __END_DECLS #define CPU_SUBTYPE_MC68030_ONLY ((cpu_subtype_t) 3) /* - * I386 subtypes. + * I386 subtypes */ -#define CPU_SUBTYPE_I386_ALL ((cpu_subtype_t) 3) -#define CPU_SUBTYPE_386 ((cpu_subtype_t) 3) -#define CPU_SUBTYPE_486 ((cpu_subtype_t) 4) -#define CPU_SUBTYPE_486SX ((cpu_subtype_t) 4 + 128) -#define CPU_SUBTYPE_586 ((cpu_subtype_t) 5) #define CPU_SUBTYPE_INTEL(f, m) ((cpu_subtype_t) (f) + ((m) << 4)) + +#define CPU_SUBTYPE_I386_ALL CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_386 CPU_SUBTYPE_INTEL(3, 0) +#define CPU_SUBTYPE_486 CPU_SUBTYPE_INTEL(4, 0) +#define CPU_SUBTYPE_486SX CPU_SUBTYPE_INTEL(4, 8) // 8 << 4 = 128 +#define CPU_SUBTYPE_586 CPU_SUBTYPE_INTEL(5, 0) #define CPU_SUBTYPE_PENT CPU_SUBTYPE_INTEL(5, 0) #define CPU_SUBTYPE_PENTPRO CPU_SUBTYPE_INTEL(6, 1) #define CPU_SUBTYPE_PENTII_M3 CPU_SUBTYPE_INTEL(6, 3) #define CPU_SUBTYPE_PENTII_M5 CPU_SUBTYPE_INTEL(6, 5) +#define CPU_SUBTYPE_CELERON CPU_SUBTYPE_INTEL(7, 6) +#define CPU_SUBTYPE_CELERON_MOBILE CPU_SUBTYPE_INTEL(7, 7) +#define CPU_SUBTYPE_PENTIUM_3 CPU_SUBTYPE_INTEL(8, 0) +#define CPU_SUBTYPE_PENTIUM_3_M CPU_SUBTYPE_INTEL(8, 1) +#define CPU_SUBTYPE_PENTIUM_3_XEON CPU_SUBTYPE_INTEL(8, 2) +#define CPU_SUBTYPE_PENTIUM_M CPU_SUBTYPE_INTEL(9, 0) +#define CPU_SUBTYPE_PENTIUM_4 CPU_SUBTYPE_INTEL(10, 0) +#define CPU_SUBTYPE_PENTIUM_4_M CPU_SUBTYPE_INTEL(10, 1) +#define CPU_SUBTYPE_ITANIUM CPU_SUBTYPE_INTEL(11, 0) +#define CPU_SUBTYPE_ITANIUM_2 CPU_SUBTYPE_INTEL(11, 1) +#define CPU_SUBTYPE_XEON CPU_SUBTYPE_INTEL(12, 0) +#define CPU_SUBTYPE_XEON_MP CPU_SUBTYPE_INTEL(12, 1) #define CPU_SUBTYPE_INTEL_FAMILY(x) ((x) & 15) #define CPU_SUBTYPE_INTEL_FAMILY_MAX 15 @@ -237,6 +253,14 @@ __END_DECLS #define CPU_SUBTYPE_INTEL_MODEL(x) ((x) >> 4) #define CPU_SUBTYPE_INTEL_MODEL_ALL 0 +/* + * X86 subtypes. + */ + +#define CPU_SUBTYPE_X86_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_64_ALL ((cpu_subtype_t)3) +#define CPU_SUBTYPE_X86_ARCH1 ((cpu_subtype_t)4) + #define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1) @@ -303,4 +327,20 @@ __END_DECLS #define CPU_SUBTYPE_POWERPC_7450 ((cpu_subtype_t) 11) #define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100) +/* + * CPU families (sysctl hw.cpufamily) + * + * NB: the encodings of the CPU families are intentionally arbitrary. + * There is no ordering, and you should never try to deduce whether + * or not some feature is available based on the family. + * Use feature flags (eg, hw.optional.altivec) to test for optional + * functionality. + */ +#define CPUFAMILY_UNKNOWN 0 +#define CPUFAMILY_POWERPC_G3 0xcee41549 +#define CPUFAMILY_POWERPC_G4 0x77c184ae +#define CPUFAMILY_POWERPC_G5 0xed76d8aa +#define CPUFAMILY_INTEL_6_14 0x73d67300 /* Intel Core Solo and Intel Core Duo (32-bit Pentium-M with SSE3) */ +#define CPUFAMILY_INTEL_6_15 0x426f69ef /* Intel Core 2 */ + #endif /* _MACH_MACHINE_H_ */ diff --git a/osfmk/mach/machine/asm.h b/osfmk/mach/machine/asm.h index d4a8aee92..76c330464 100644 --- a/osfmk/mach/machine/asm.h +++ b/osfmk/mach/machine/asm.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/asm.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/asm.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/boolean.h b/osfmk/mach/machine/boolean.h index 6a3f99afe..3d8aba6a1 100644 --- a/osfmk/mach/machine/boolean.h +++ b/osfmk/mach/machine/boolean.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/boolean.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/boolean.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/exception.h b/osfmk/mach/machine/exception.h index 3640e2b18..a1f3270e1 100644 --- a/osfmk/mach/machine/exception.h +++ b/osfmk/mach/machine/exception.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/exception.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/exception.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/kern_return.h b/osfmk/mach/machine/kern_return.h index 8f3366f99..f5db5df57 100644 --- a/osfmk/mach/machine/kern_return.h +++ b/osfmk/mach/machine/kern_return.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/kern_return.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/kern_return.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/machine_types.defs b/osfmk/mach/machine/machine_types.defs index 459ff4b3a..b75229ee8 100644 --- a/osfmk/mach/machine/machine_types.defs +++ b/osfmk/mach/machine/machine_types.defs @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/machine_types.defs" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/machine_types.defs" #else #error architecture not supported diff --git a/osfmk/mach/machine/ndr_def.h b/osfmk/mach/machine/ndr_def.h index f96f182b9..160c75027 100644 --- a/osfmk/mach/machine/ndr_def.h +++ b/osfmk/mach/machine/ndr_def.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/ndr_def.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/ndr_def.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/processor_info.h b/osfmk/mach/machine/processor_info.h index f8944a03f..0ca7074e3 100644 --- a/osfmk/mach/machine/processor_info.h +++ b/osfmk/mach/machine/processor_info.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/processor_info.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/processor_info.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/rpc.h b/osfmk/mach/machine/rpc.h index 62cacfd7b..0bdf02379 100644 --- a/osfmk/mach/machine/rpc.h +++ b/osfmk/mach/machine/rpc.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/rpc.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/rpc.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/syscall_sw.h b/osfmk/mach/machine/syscall_sw.h index e20bf846e..2626d4c7a 100644 --- a/osfmk/mach/machine/syscall_sw.h +++ b/osfmk/mach/machine/syscall_sw.h @@ -27,7 +27,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/syscall_sw.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/syscall_sw.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/thread_state.h b/osfmk/mach/machine/thread_state.h index 9601fb728..da012bcb1 100644 --- a/osfmk/mach/machine/thread_state.h +++ b/osfmk/mach/machine/thread_state.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined(__ppc64__) #include "mach/ppc/thread_state.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/thread_state.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/thread_status.h b/osfmk/mach/machine/thread_status.h index e78affe07..e13af4f42 100644 --- a/osfmk/mach/machine/thread_status.h +++ b/osfmk/mach/machine/thread_status.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/thread_status.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/thread_status.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/vm_param.h b/osfmk/mach/machine/vm_param.h index 8fb0957c5..936fb3772 100644 --- a/osfmk/mach/machine/vm_param.h +++ b/osfmk/mach/machine/vm_param.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/vm_param.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/vm_param.h" #else #error architecture not supported diff --git a/osfmk/mach/machine/vm_types.h b/osfmk/mach/machine/vm_types.h index 77a91ca50..31621f173 100644 --- a/osfmk/mach/machine/vm_types.h +++ b/osfmk/mach/machine/vm_types.h @@ -25,7 +25,7 @@ #if defined (__ppc__) || defined(__ppc64__) #include "mach/ppc/vm_types.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "mach/i386/vm_types.h" #else #error architecture not supported diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 46c32d901..1b6254a05 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -66,6 +66,8 @@ #include #include +#include +#include #include #include @@ -75,6 +77,8 @@ typedef unsigned long long memory_object_offset_t; typedef unsigned long long memory_object_size_t; +typedef natural_t memory_object_cluster_size_t; + /* * Temporary until real EMMI version gets re-implemented @@ -82,14 +86,61 @@ typedef unsigned long long memory_object_size_t; #ifdef KERNEL_PRIVATE +struct memory_object_pager_ops; /* forward declaration */ + typedef struct memory_object { - int *pager; + const struct memory_object_pager_ops *mo_pager_ops; } *memory_object_t; typedef struct memory_object_control { - struct vm_object *object; + struct vm_object *moc_object; + unsigned int moc_ikot; /* XXX fake ip_kotype */ } *memory_object_control_t; +typedef const struct memory_object_pager_ops { + void (*memory_object_reference)( + memory_object_t mem_obj); + void (*memory_object_deallocate)( + memory_object_t mem_obj); + kern_return_t (*memory_object_init)( + memory_object_t mem_obj, + memory_object_control_t mem_control, + memory_object_cluster_size_t size); + kern_return_t (*memory_object_terminate)( + memory_object_t mem_obj); + kern_return_t (*memory_object_data_request)( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t length, + vm_prot_t desired_access); + kern_return_t (*memory_object_data_return)( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size, + memory_object_offset_t *resid_offset, + int *io_error, + boolean_t dirty, + boolean_t kernel_copy, + int upl_flags); + kern_return_t (*memory_object_data_initialize)( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size); + kern_return_t (*memory_object_data_unlock)( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size, + vm_prot_t desired_access); + kern_return_t (*memory_object_synchronize)( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size, + vm_sync_t sync_flags); + kern_return_t (*memory_object_unmap)( + memory_object_t mem_obj); + const char *memory_object_pager_name; +} * memory_object_pager_ops_t; + #else /* KERNEL_PRIVATE */ typedef mach_port_t memory_object_t; @@ -234,8 +285,6 @@ __END_DECLS #endif /* PRIVATE */ -typedef natural_t memory_object_cluster_size_t; - struct memory_object_perf_info { memory_object_cluster_size_t cluster_size; boolean_t may_cache; @@ -385,12 +434,14 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_NOZEROFILL 0x00400000 #define UPL_WILL_MODIFY 0x00800000 /* caller will modify the pages */ +#define UPL_NEED_32BIT_ADDR 0x01000000 + /* UPL flags known by this kernel */ -#define UPL_VALID_FLAGS 0x00FFFFFF +#define UPL_VALID_FLAGS 0x01FFFFFF /* upl abort error flags */ -#define UPL_ABORT_RESTART 0x1 +#define UPL_ABORT_RESTART 0x1 #define UPL_ABORT_UNAVAILABLE 0x2 #define UPL_ABORT_ERROR 0x4 #define UPL_ABORT_FREE_ON_EMPTY 0x8 /* only implemented in wrappers */ @@ -550,8 +601,7 @@ extern vm_size_t upl_get_internal_pagelist_offset(void); __BEGIN_DECLS extern ppnum_t upl_phys_page(upl_page_info_t *upl, int index); -extern void upl_set_dirty(upl_t upl); -extern void upl_clear_dirty(upl_t upl); +extern void upl_clear_dirty(upl_t upl, boolean_t value); __END_DECLS diff --git a/osfmk/mach/mig.h b/osfmk/mach/mig.h index ad6cc4aa2..bdb9ce2eb 100644 --- a/osfmk/mach/mig.h +++ b/osfmk/mach/mig.h @@ -52,13 +52,11 @@ #endif /* defined(TypeCheck) */ /* - * Pack MIG message structs if we have Power alignment of structs. + * Pack MIG message structs. * This is an indicator of the need to view shared structs in a * binary-compatible format - and MIG message structs are no different. */ -#if __DARWIN_ALIGN_POWER #define __MigPackStructs 1 -#endif /* * Definition for MIG-generated server stub routines. These routines diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h index b54438dc3..8803a516c 100644 --- a/osfmk/mach/port.h +++ b/osfmk/mach/port.h @@ -93,7 +93,7 @@ typedef natural_t mach_port_name_t; typedef mach_port_name_t *mach_port_name_array_t; -#ifdef KERNEL_PRIVATE +#ifdef KERNEL /* * mach_port_t - a named port right @@ -134,7 +134,7 @@ typedef struct ipc_port *ipc_port_t; typedef ipc_port_t mach_port_t; -#else /* KERNEL_PRIVATE */ +#else /* KERNEL */ /* * mach_port_t - a named port right @@ -157,7 +157,7 @@ typedef ipc_port_t mach_port_t; typedef mach_port_name_t mach_port_t; #endif -#endif /* KERNEL_PRIVATE */ +#endif /* KERNEL */ typedef mach_port_t *mach_port_array_t; diff --git a/osfmk/mach/ppc/rpc.h b/osfmk/mach/ppc/rpc.h index 0380563c4..a5b6e1752 100644 --- a/osfmk/mach/ppc/rpc.h +++ b/osfmk/mach/ppc/rpc.h @@ -26,4 +26,4 @@ #ifndef _MACH_PPC_RPC_H_ #define _MACH_PPC_RPC_H_ -#endif _MACH_PPC_RPC_H_ +#endif /* _MACH_PPC_RPC_H_ */ diff --git a/osfmk/mach/ppc/syscall_sw.h b/osfmk/mach/ppc/syscall_sw.h index b1aedddae..9e4c01ab8 100644 --- a/osfmk/mach/ppc/syscall_sw.h +++ b/osfmk/mach/ppc/syscall_sw.h @@ -65,7 +65,7 @@ ppc_trap(CHUDCall,0x6009) ppc_trap(ppcNull,0x600A) ppc_trap(perfmon_control,0x600B) ppc_trap(ppcNullinst,0x600C) -ppc_trap(pmsCntrl,0x600D) +ppc_trap(pmsCPUCntrl,0x600D) #endif /* _MACH_SYSCALL_SW_H_ */ #endif /* _MACH_PPC_SYSCALL_SW_H_ */ diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h index d6c2f5589..a3c028995 100644 --- a/osfmk/mach/syscall_sw.h +++ b/osfmk/mach/syscall_sw.h @@ -90,6 +90,7 @@ kernel_trap(init_process,-41,0) kernel_trap(map_fd,-43,5) #endif /* __LP64__ */ +kernel_trap(task_name_for_pid,-44,3) kernel_trap(task_for_pid,-45,3) kernel_trap(pid_for_task,-46,2) diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs index d04efd4bd..f1743eb1a 100644 --- a/osfmk/mach/task.defs +++ b/osfmk/mach/task.defs @@ -111,7 +111,7 @@ routine mach_ports_lookup( * Returns information about the target task. */ routine task_info( - target_task : task_t; + target_task : task_name_t; flavor : task_flavor_t; out task_info_out : task_info_t, CountInOut); diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index 05e4db68c..398892373 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -81,9 +81,7 @@ typedef integer_t task_info_data_t[TASK_INFO_MAX]; * Currently defined information structures. */ -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) #define TASK_BASIC_INFO_32 4 /* basic information */ @@ -200,9 +198,7 @@ typedef struct task_absolutetime_info *task_absolutetime_info_t; #define TASK_AUDIT_TOKEN_COUNT \ (sizeof(audit_token_t) / sizeof(natural_t)) -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() /* diff --git a/osfmk/mach/task_special_ports.h b/osfmk/mach/task_special_ports.h index a3d74ec7a..9596805d8 100644 --- a/osfmk/mach/task_special_ports.h +++ b/osfmk/mach/task_special_ports.h @@ -68,6 +68,8 @@ typedef int task_special_port_t; #define TASK_HOST_PORT 2 /* The host (priv) port for task. */ +#define TASK_NAME_PORT 3 /* the name (unpriv) port for task */ + #define TASK_BOOTSTRAP_PORT 4 /* Bootstrap environment for task. */ /* diff --git a/osfmk/mach/thread_status.h b/osfmk/mach/thread_status.h index c806f7fc0..b350ed303 100644 --- a/osfmk/mach/thread_status.h +++ b/osfmk/mach/thread_status.h @@ -77,9 +77,10 @@ typedef natural_t *thread_state_t; /* Variable-length array */ /* THREAD_STATE_MAX is now defined in */ -typedef int thread_state_data_t[THREAD_STATE_MAX]; +typedef natural_t thread_state_data_t[THREAD_STATE_MAX]; #define THREAD_STATE_FLAVOR_LIST 0 /* List of valid flavors */ +#define THREAD_STATE_FLAVOR_LIST_NEW 128 typedef int thread_state_flavor_t; typedef thread_state_flavor_t *thread_state_flavor_array_t; diff --git a/osfmk/mach/vm_param.h b/osfmk/mach/vm_param.h index 4a2da445e..c23cf07f7 100644 --- a/osfmk/mach/vm_param.h +++ b/osfmk/mach/vm_param.h @@ -214,6 +214,9 @@ extern uint64_t mem_actual; /* 64-bit size of memory - not limited by maxmem * extern uint64_t sane_size; /* Memory size to use for defaults calculations */ extern addr64_t vm_last_addr; /* Highest kernel virtual address known to the VM system */ +extern const vm_offset_t vm_min_kernel_address; +extern const vm_offset_t vm_max_kernel_address; + #endif /* XNU_KERNEL_PRIVATE */ extern vm_size_t page_size; diff --git a/osfmk/mach/vm_prot.h b/osfmk/mach/vm_prot.h index 011ce85a7..71eaf4877 100644 --- a/osfmk/mach/vm_prot.h +++ b/osfmk/mach/vm_prot.h @@ -78,6 +78,12 @@ typedef int vm_prot_t; #define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */ #define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */ +/* + * For now we can only support no execute on stacks... too many + * entries are marked w/o VM_PROT_EXECUTE that actually have code + */ +#define STACK_ONLY_NX 1 + /* * The default protection for newly-created virtual memory */ @@ -123,4 +129,5 @@ typedef int vm_prot_t; #define VM_PROT_WANTS_COPY ((vm_prot_t) 0x10) + #endif /* _MACH_VM_PROT_H_ */ diff --git a/osfmk/mach/vm_region.h b/osfmk/mach/vm_region.h index 3677bc25c..987ea01ae 100644 --- a/osfmk/mach/vm_region.h +++ b/osfmk/mach/vm_region.h @@ -44,9 +44,7 @@ #include -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) /* * Types defined: @@ -251,8 +249,6 @@ struct vm_read_entry { typedef struct mach_vm_read_entry mach_vm_read_entry_t[VM_MAP_ENTRY_MAX]; typedef struct vm_read_entry vm_read_entry_t[VM_MAP_ENTRY_MAX]; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #endif /*_MACH_VM_REGION_H_*/ diff --git a/osfmk/mach_debug/vm_info.h b/osfmk/mach_debug/vm_info.h index aee5bb755..a02bba1f0 100644 --- a/osfmk/mach_debug/vm_info.h +++ b/osfmk/mach_debug/vm_info.h @@ -66,9 +66,7 @@ #include #include -#if __DARWIN_ALIGN_POWER -#pragma options align=power -#endif +#pragma pack(4) /* * Remember to update the mig type definitions @@ -142,8 +140,6 @@ typedef struct vm_info_object { typedef vm_info_object_t *vm_info_object_array_t; -#if __DARWIN_ALIGN_POWER -#pragma options align=reset -#endif +#pragma pack() #endif /* _MACH_DEBUG_VM_INFO_H_ */ diff --git a/osfmk/machine/cpu_capabilities.h b/osfmk/machine/cpu_capabilities.h index 65f8ea8f7..2d441d502 100644 --- a/osfmk/machine/cpu_capabilities.h +++ b/osfmk/machine/cpu_capabilities.h @@ -36,7 +36,7 @@ #else /* !KERNEL_PRIVATE -- System Framework header */ #if defined (__ppc__) || defined(__ppc64__) #include -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include #else #error architecture not supported diff --git a/osfmk/ppc/Diagnostics.h b/osfmk/ppc/Diagnostics.h index 7983b9bb2..ad7767ca1 100644 --- a/osfmk/ppc/Diagnostics.h +++ b/osfmk/ppc/Diagnostics.h @@ -29,6 +29,7 @@ /* * Here are the Diagnostic interface interfaces * Lovingly crafted by Bill Angell using traditional methods + * Keep selectors in sync with the x86 version where possible. */ #ifdef KERNEL_PRIVATE diff --git a/osfmk/ppc/Makefile b/osfmk/ppc/Makefile index 5649d653b..03891582a 100644 --- a/osfmk/ppc/Makefile +++ b/osfmk/ppc/Makefile @@ -16,11 +16,11 @@ EXPORT_ONLY_FILES = \ io_map_entries.h \ lock.h \ locks.h \ + pms.h \ proc_reg.h \ machine_routines.h \ mappings.h \ savearea.h \ - pms.h \ simple_lock.h INSTALL_MD_DIR = ppc diff --git a/osfmk/ppc/PPCcalls.h b/osfmk/ppc/PPCcalls.h index e8beeb20d..adc5badf3 100644 --- a/osfmk/ppc/PPCcalls.h +++ b/osfmk/ppc/PPCcalls.h @@ -49,7 +49,7 @@ PPCcallEnt PPCcalls[] = { PPCcall(bb_settaskenv), /* 0x6007 Set the BlueBox per thread task environment data */ PPCcall(vmm_stop_vm), /* 0x6008 Stop a running VM */ - PPCcall(dis), /* 0x6009 CHUD Interface hook */ + PPCcall(dis), /* 0x6009 disabled */ PPCcall(ppcNull), /* 0x600A Null PPC syscall */ PPCcall(perfmon_control), /* 0x600B performance monitor */ diff --git a/osfmk/ppc/ast.h b/osfmk/ppc/ast.h index 8b0e95c6d..274a8d84a 100644 --- a/osfmk/ppc/ast.h +++ b/osfmk/ppc/ast.h @@ -30,9 +30,8 @@ #ifndef _PPC_AST_H_ #define _PPC_AST_H_ -#define AST_PPC_CHUD_URGENT 0x80000000 -#define AST_PPC_CHUD 0x40000000 - -#define AST_PPC_CHUD_ALL (AST_PPC_CHUD_URGENT|AST_PPC_CHUD) +#define AST_PPC_CHUD_URGENT AST_CHUD_URGENT +#define AST_PPC_CHUD AST_CHUD +#define AST_PPC_CHUD_ALL AST_CHUD_ALL #endif /* _PPC_AST_H_ */ diff --git a/osfmk/ppc/chud/chud_xnu.h b/osfmk/ppc/chud/chud_xnu.h deleted file mode 100644 index ace3b7969..000000000 --- a/osfmk/ppc/chud/chud_xnu.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -#ifndef _PPC_CHUD_XNU_H_ -#define _PPC_CHUD_XNU_H_ - - -#include -#include -#include - -#pragma mark **** version **** -uint32_t chudxnu_version(void); - -#pragma mark **** task **** -// ******************************************************************************** -// task -// ******************************************************************************** -int chudxnu_pid_for_task(task_t task); -task_t chudxnu_task_for_pid(int pid); -int chudxnu_current_pid(void); - -kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size); -kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size); -kern_return_t chudxnu_kern_read(void *destaddr, vm_offset_t srcaddr, vm_size_t size); -kern_return_t chudxnu_kern_write(vm_offset_t destaddr, void *srcaddr, vm_size_t size); - -boolean_t chudxnu_is_64bit_task(task_t task); - -#pragma mark **** thread **** -// ******************************************************************************** -// thread -// ******************************************************************************** -kern_return_t chudxnu_bind_thread(thread_t thread, int cpu); - -kern_return_t chudxnu_unbind_thread(thread_t thread); - -kern_return_t chudxnu_thread_get_state( thread_t thread, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t *count, - boolean_t user_only); - -kern_return_t chudxnu_thread_set_state( thread_t thread, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t count, - boolean_t user_only); - -kern_return_t chudxnu_thread_user_state_available(thread_t thread); - - -kern_return_t chudxnu_thread_get_callstack( thread_t thread, - uint32_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only); - -kern_return_t chudxnu_thread_get_callstack64(thread_t thread, - uint64_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only); - -task_t chudxnu_current_task(void); -thread_t chudxnu_current_thread(void); - -task_t chudxnu_task_for_thread(thread_t thread); - -kern_return_t chudxnu_all_tasks(task_array_t *task_list, - mach_msg_type_number_t *count); -kern_return_t chudxnu_free_task_list(task_array_t *task_list, - mach_msg_type_number_t *count); - -kern_return_t chudxnu_all_threads( thread_array_t *thread_list, - mach_msg_type_number_t *count); -kern_return_t chudxnu_task_threads( task_t task, - thread_array_t *thread_list, - mach_msg_type_number_t *count); -kern_return_t chudxnu_free_thread_list(thread_array_t *thread_list, - mach_msg_type_number_t *count); - -kern_return_t chudxnu_thread_info( thread_t thread, - thread_flavor_t flavor, - thread_info_t thread_info_out, - mach_msg_type_number_t *thread_info_count); - -kern_return_t chudxnu_thread_last_context_switch(thread_t thread, uint64_t *timestamp); - - -#pragma mark **** memory **** -// ******************************************************************************** -// memory -// ******************************************************************************** - -uint64_t chudxnu_avail_memory_size(void); -uint64_t chudxnu_phys_memory_size(void); - -vm_offset_t chudxnu_io_map(uint64_t phys_addr, vm_size_t size); - -uint32_t chudxnu_phys_addr_wimg(uint64_t phys_addr); - -#pragma mark **** cpu **** -// ******************************************************************************** -// cpu -// ******************************************************************************** -int chudxnu_avail_cpu_count(void); -int chudxnu_phys_cpu_count(void); -int chudxnu_cpu_number(void); - -kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable); - -kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable); -boolean_t chudxnu_cpu_nap_enabled(int cpu); - -boolean_t chudxnu_get_interrupts_enabled(void); -boolean_t chudxnu_set_interrupts_enabled(boolean_t enable); -boolean_t chudxnu_at_interrupt_context(void); -void chudxnu_cause_interrupt(void); - -kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val); -kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val); - -uint32_t chudxnu_get_orig_cpu_l2cr(int cpu); -uint32_t chudxnu_get_orig_cpu_l3cr(int cpu); - -kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p); -kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p); -kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val); -kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val); - -void chudxnu_flush_caches(void); -void chudxnu_enable_caches(boolean_t enable); - -kern_return_t chudxnu_perfmon_acquire_facility(task_t); -kern_return_t chudxnu_perfmon_release_facility(task_t); - -uint32_t * chudxnu_get_branch_trace_buffer(uint32_t *entries); - -typedef struct { - uint32_t hwResets; - uint32_t hwMachineChecks; - uint32_t hwDSIs; - uint32_t hwISIs; - uint32_t hwExternals; - uint32_t hwAlignments; - uint32_t hwPrograms; - uint32_t hwFloatPointUnavailable; - uint32_t hwDecrementers; - uint32_t hwIOErrors; - uint32_t hwSystemCalls; - uint32_t hwTraces; - uint32_t hwFloatingPointAssists; - uint32_t hwPerformanceMonitors; - uint32_t hwAltivecs; - uint32_t hwInstBreakpoints; - uint32_t hwSystemManagements; - uint32_t hwAltivecAssists; - uint32_t hwThermal; - uint32_t hwSoftPatches; - uint32_t hwMaintenances; - uint32_t hwInstrumentations; -} rupt_counters_t; - -kern_return_t chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts); -kern_return_t chudxnu_clear_cpu_rupt_counters(int cpu); - -kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable); - -kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data); -kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data); - -#pragma mark **** callbacks **** -// ******************************************************************************** -// callbacks -// ******************************************************************************** - -void chudxnu_cancel_all_callbacks(void); - -// cpu timer - each cpu has its own callback -typedef kern_return_t (*chudxnu_cpu_timer_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_cpu_timer_callback_enter(chudxnu_cpu_timer_callback_func_t func, uint32_t time, uint32_t units); // callback is entered on current cpu -kern_return_t chudxnu_cpu_timer_callback_cancel(void); // callback is cleared on current cpu -kern_return_t chudxnu_cpu_timer_callback_cancel_all(void); // callback is cleared on all cpus - -// trap callback - one callback for system -typedef kern_return_t (*chudxnu_trap_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func); -kern_return_t chudxnu_trap_callback_cancel(void); - -// interrupt callback - one callback for system -typedef kern_return_t (*chudxnu_interrupt_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func); -kern_return_t chudxnu_interrupt_callback_cancel(void); - -// ast callback - one callback for system -typedef kern_return_t (*chudxnu_perfmon_ast_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func); -kern_return_t chudxnu_perfmon_ast_callback_cancel(void); -kern_return_t chudxnu_perfmon_ast_send(void); -kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent); - -// cpusig callback - one callback for system -typedef kern_return_t (*chudxnu_cpusig_callback_func_t)(int request, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func); -kern_return_t chudxnu_cpusig_callback_cancel(void); -kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request); - -// kdebug callback - one callback for system -typedef kern_return_t (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); -kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func); -kern_return_t chudxnu_kdebug_callback_cancel(void); - -// timer callback - multiple callbacks -typedef kern_return_t (*chudxnu_timer_callback_func_t)(uint32_t param0, uint32_t param1); -typedef void * chud_timer_t; -chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0); -kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units); -kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer); -kern_return_t chudxnu_timer_free(chud_timer_t timer); - -// CHUD systemcall callback - one callback for system -typedef kern_return_t (*chudxnu_syscall_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); -kern_return_t chudxnu_syscall_callback_cancel(void); - -// ******************************************************************************** -// DEPRECATED -// ******************************************************************************** -kern_return_t chudxnu_bind_current_thread(int cpu); - -kern_return_t chudxnu_unbind_current_thread(void); - -kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only); - -thread_t chudxnu_current_act(void); - -// thread timer callback - one callback for system -typedef kern_return_t (*chudxnu_thread_timer_callback_func_t)(uint32_t param); -kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t param, uint32_t time, uint32_t units); -kern_return_t chudxnu_thread_timer_callback_cancel(void); - -#endif /* _PPC_CHUD_XNU_H_ */ diff --git a/osfmk/ppc/commpage/commpage.h b/osfmk/ppc/commpage/commpage.h index 9ba062f5d..486e34242 100644 --- a/osfmk/ppc/commpage/commpage.h +++ b/osfmk/ppc/commpage/commpage.h @@ -75,7 +75,7 @@ extern char *commPagePtr32; // virt address of 32-bit commpage in kernel map extern char *commPagePtr64; // virt address of 64-bit commpage in kernel map -extern void commpage_set_timestamp(uint64_t tbr,uint32_t secs,uint32_t usecs,uint32_t ticks_per_sec); +extern void commpage_set_timestamp(uint64_t tbr, uint64_t secs, uint32_t ticks_per_sec); extern int commpage_time_dcba( void ); #endif /* __ASSEMBLER__ */ diff --git a/osfmk/ppc/commpage/commpage_asm.s b/osfmk/ppc/commpage/commpage_asm.s index b372b979e..c3770a8fb 100644 --- a/osfmk/ppc/commpage/commpage_asm.s +++ b/osfmk/ppc/commpage/commpage_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -74,7 +74,7 @@ Ldata: * *********************************************** * * Update the gettimeofday() shared data on the commpages, as follows: - * _COMM_PAGE_TIMESTAMP = a BSD-style pair of uint_32's for secs and usecs + * _COMM_PAGE_TIMESTAMP = the clock offset at timebase (seconds) * _COMM_PAGE_TIMEBASE = the timebase at which the timestamp was valid * _COMM_PAGE_SEC_PER_TICK = multiply timebase ticks by this to get seconds (double) * The convention is that if the timebase is 0, the data is invalid. Because other @@ -94,8 +94,8 @@ Ldata: * When called: * r3 = upper half of timebase (timebase is disabled if 0) * r4 = lower half of timebase - * r5 = seconds part of timestamp - * r6 = useconds part of timestamp + * r5 = upper half of timestamp + * r6 = lower half of timestamp * r7 = divisor (ie, timebase ticks per sec) * We set up: * r8 = ptr to our static data (kkBinary0, kkDouble1, kkTicksPerSec) @@ -106,7 +106,7 @@ Ldata: */ .align 5 -LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,divisor) +LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,divisor) mfmsr r11 // get MSR ori r2,r11,MASK(MSR_FP) // turn FP on mtmsr r2 @@ -155,7 +155,7 @@ LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,di lfd f3,kkTicksPerSec(r8) // float new ticks_per_sec + 2**52 lfd f4,kkDouble1(r8) // f4 <- double(1.0) mffs f5 // save caller's FPSCR - mtfsfi 7,0 // clear Inexeact Exception bit, set round-to-nearest + mtfsfi 7,1 // clear Inexeact Exception bit, set round-to-zero fsub f3,f3,f2 // get ticks_per_sec fdiv f3,f4,f3 // divide 1 by ticks_per_sec to get SEC_PER_TICK stfd f3,_COMM_PAGE_SEC_PER_TICK(r9) diff --git a/osfmk/ppc/commpage/gettimeofday.s b/osfmk/ppc/commpage/gettimeofday.s index 6d17fd9ec..ada3d1481 100644 --- a/osfmk/ppc/commpage/gettimeofday.s +++ b/osfmk/ppc/commpage/gettimeofday.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,9 +26,6 @@ #include #include -#define USEC_PER_SEC 1000000 - - /* The red zone is used to move data between GPRs and FPRs: */ #define rzTicks -8 // elapsed ticks since timestamp (double) @@ -48,7 +45,7 @@ // in user mode, usually without having to make a system call. We do not deal with // the timezone. The kernel maintains the following values in the comm page: // -// _COMM_PAGE_TIMESTAMP = a BSD-style pair of uint_32's for seconds and microseconds +// _COMM_PAGE_TIMESTAMP = 64 bit seconds timestamp // // _COMM_PAGE_TIMEBASE = the timebase at which the timestamp was valid // @@ -69,8 +66,7 @@ gettimeofday_32: // int gettimeofday(timeval *tp); 0: lwz r5,_COMM_PAGE_TIMEBASE+0(0) // r5,r6 = TBR at timestamp lwz r6,_COMM_PAGE_TIMEBASE+4(0) - lwz r7,_COMM_PAGE_TIMESTAMP+0(0) // r7 = timestamp seconds - lwz r8,_COMM_PAGE_TIMESTAMP+4(0) // r8 = timestamp microseconds + lwz r8,_COMM_PAGE_TIMESTAMP+4(0) // r8 = timestamp 32 bit seconds lfd f1,_COMM_PAGE_SEC_PER_TICK(0) 1: mftbu r10 // r10,r11 = current timebase @@ -84,31 +80,28 @@ gettimeofday_32: // int gettimeofday(timeval *tp); lwz r0,_COMM_PAGE_TIMEBASE+0(0) // then load data a 2nd time lwz r12,_COMM_PAGE_TIMEBASE+4(0) - lwz r2,_COMM_PAGE_TIMESTAMP+0(0) lwz r9,_COMM_PAGE_TIMESTAMP+4(0) cmplw cr6,r5,r0 // did we read a consistent set? cmplw cr7,r6,r12 beq- 3f // timestamp is disabled so return bad status - cmplw cr1,r2,r7 cmplw cr5,r9,r8 crand cr0_eq,cr6_eq,cr7_eq - crand cr1_eq,cr1_eq,cr5_eq - crand cr0_eq,cr0_eq,cr1_eq + crand cr0_eq,cr0_eq,cr5_eq bne- 0b // loop until we have a consistent set of data subfc r11,r6,r11 // compute ticks since timestamp lwz r9,_COMM_PAGE_2_TO_52(0) // get exponent for (2**52) subfe r10,r5,r10 // complete 64-bit subtract - lfd f2,_COMM_PAGE_2_TO_52(0) // f3 <- (2**52) + lfd f2,_COMM_PAGE_2_TO_52(0) // f2 <- (2**52) srwi. r0,r10,2 // if more than 2**34 ticks have elapsed... stw r11,rzTicks+4(r1) // store elapsed ticks into red zone or r10,r10,r9 // convert long-long in (r10,r11) into double bne- 3f // ...call kernel to reprime timestamp stw r10,rzTicks(r1) // complete double - lis r12,hi16(USEC_PER_SEC) - ori r12,r12,lo16(USEC_PER_SEC) - + + mffs f7 + mtfsfi 7,1 lfd f3,rzTicks(r1) // get elapsed ticks since timestamp + 2**52 fsub f4,f3,f2 // subtract 2**52 and normalize fmul f5,f4,f1 // f5 <- elapsed seconds since timestamp @@ -122,19 +115,14 @@ gettimeofday_32: // int gettimeofday(timeval *tp); fmul f6,f6,f3 // f6 <- fractional elapsed useconds fctiwz f6,f6 // convert useconds to integer stfd f6,rzUSeconds(r1) // store useconds into red zone + mtfsf 0xff,f7 lwz r5,rzSeconds+4(r1) // r5 <- seconds since timestamp - lwz r6,rzUSeconds+4(r1) // r6 <- useconds since timestamp - add r7,r7,r5 // add elapsed seconds to timestamp seconds - add r8,r8,r6 // ditto useconds + lwz r7,rzUSeconds+4(r1) // r7 <- useconds since timestamp + add r6,r8,r5 // add elapsed seconds to timestamp seconds - cmplw r8,r12 // r8 >= USEC_PER_SEC ? - blt 2f // no - addi r7,r7,1 // add 1 to secs - sub r8,r8,r12 // subtract USEC_PER_SEC from usecs -2: - stw r7,0(r3) // store secs//usecs into user's timeval - stw r8,4(r3) + stw r6,0(r3) // store secs//usecs into user's timeval + stw r7,4(r3) li r3,0 // return success blr 3: // too long since last timestamp or this code is disabled @@ -149,12 +137,12 @@ gettimeofday_32: // int gettimeofday(timeval *tp); // *************************************** // // This routine is called in 32-bit mode on 64-bit processors. A timeval is a struct of -// a long seconds and int useconds, so it's size depends on mode. +// a long seconds and int useconds, so its size depends on mode. gettimeofday_g5_32: // int gettimeofday(timeval *tp); 0: ld r6,_COMM_PAGE_TIMEBASE(0) // r6 = TBR at timestamp - ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds,useconds) + ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds) lfd f1,_COMM_PAGE_SEC_PER_TICK(0) mftb r10 // r10 = get current timebase lwsync // create a barrier if MP (patched to NOP if UP) @@ -172,6 +160,8 @@ gettimeofday_g5_32: // int gettimeofday(timeval *tp); std r11,rzTicks(r1) // put ticks in redzone where we can "lfd" it bne-- 3f // timestamp too old, so reprime + mffs f7 + mtfsfi 7,1 lfd f3,rzTicks(r1) // get elapsed ticks since timestamp (fixed pt) fcfid f4,f3 // float the tick count fmul f5,f4,f1 // f5 <- elapsed seconds since timestamp @@ -183,22 +173,14 @@ gettimeofday_g5_32: // int gettimeofday(timeval *tp); fmul f6,f6,f3 // f6 <- fractional elapsed useconds fctidz f6,f6 // convert useconds to fixed pt integer stfd f6,rzUSeconds(r1) // store useconds into red zone + mtfsf 0xff,f7 - lis r12,hi16(USEC_PER_SEC) // r12 <- 10**6 - srdi r7,r8,32 // extract seconds from doubleword timestamp lwz r5,rzSeconds+4(r1) // r5 <- seconds since timestamp - ori r12,r12,lo16(USEC_PER_SEC) - lwz r6,rzUSeconds+4(r1) // r6 <- useconds since timestamp - add r7,r7,r5 // add elapsed seconds to timestamp seconds - add r8,r8,r6 // ditto useconds + lwz r7,rzUSeconds+4(r1) // r7 <- useconds since timestamp + add r6,r8,r5 // add elapsed seconds to timestamp seconds - cmplw r8,r12 // r8 >= USEC_PER_SEC ? - blt 2f // no - addi r7,r7,1 // add 1 to secs - sub r8,r8,r12 // subtract USEC_PER_SEC from usecs -2: - stw r7,0(r3) // store secs//usecs into user's timeval - stw r8,4(r3) + stw r6,0(r3) // store secs//usecs into user's timeval + stw r7,4(r3) li r3,0 // return success blr 3: // too long since last timestamp or this code is disabled @@ -213,12 +195,12 @@ gettimeofday_g5_32: // int gettimeofday(timeval *tp); // *************************************** // // This routine is called in 64-bit mode on 64-bit processors. A timeval is a struct of -// a long seconds and int useconds, so it's size depends on mode. +// a long seconds and int useconds, so its size depends on mode. gettimeofday_g5_64: // int gettimeofday(timeval *tp); 0: ld r6,_COMM_PAGE_TIMEBASE(0) // r6 = TBR at timestamp - ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds,useconds) + ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds) lfd f1,_COMM_PAGE_SEC_PER_TICK(0) mftb r10 // r10 = get current timebase lwsync // create a barrier if MP (patched to NOP if UP) @@ -236,6 +218,8 @@ gettimeofday_g5_64: // int gettimeofday(timeval *tp); std r11,rzTicks(r1) // put ticks in redzone where we can "lfd" it bne-- 3f // timestamp too old, so reprime + mffs f7 + mtfsfi 7,1 lfd f3,rzTicks(r1) // get elapsed ticks since timestamp (fixed pt) fcfid f4,f3 // float the tick count fmul f5,f4,f1 // f5 <- elapsed seconds since timestamp @@ -247,22 +231,14 @@ gettimeofday_g5_64: // int gettimeofday(timeval *tp); fmul f6,f6,f3 // f6 <- fractional elapsed useconds fctidz f6,f6 // convert useconds to fixed pt integer stfd f6,rzUSeconds(r1) // store useconds into red zone + mtfsf 0xff,f7 - lis r12,hi16(USEC_PER_SEC) // r12 <- 10**6 - srdi r7,r8,32 // extract seconds from doubleword timestamp lwz r5,rzSeconds+4(r1) // r5 <- seconds since timestamp - ori r12,r12,lo16(USEC_PER_SEC) - lwz r6,rzUSeconds+4(r1) // r6 <- useconds since timestamp - add r7,r7,r5 // add elapsed seconds to timestamp seconds - add r8,r8,r6 // ditto useconds + lwz r7,rzUSeconds+4(r1) // r7 <- useconds since timestamp + add r6,r8,r5 // add elapsed seconds to timestamp seconds - cmplw r8,r12 // r8 >= USEC_PER_SEC ? - blt 2f // no - addi r7,r7,1 // add 1 to secs - sub r8,r8,r12 // subtract USEC_PER_SEC from usecs -2: - std r7,0(r3) // store secs//usecs into user's timeval - stw r8,8(r3) + std r6,0(r3) // store secs//usecs into user's timeval + stw r7,8(r3) li r3,0 // return success blr 3: // too long since last timestamp or this code is disabled diff --git a/osfmk/ppc/conf.c b/osfmk/ppc/conf.c index 585ef5b73..db8796ddb 100644 --- a/osfmk/ppc/conf.c +++ b/osfmk/ppc/conf.c @@ -71,10 +71,10 @@ extern struct clock_ops sysclk_ops, calend_ops; struct clock clock_list[] = { /* SYSTEM_CLOCK */ - { &sysclk_ops, 0, 0, 0 }, + { &sysclk_ops, 0, 0 }, /* CALENDAR_CLOCK */ - { &calend_ops, 0, 0, 0 }, + { &calend_ops, 0, 0 }, }; int clock_count = sizeof(clock_list) / sizeof(clock_list[0]); diff --git a/osfmk/ppc/cpu.c b/osfmk/ppc/cpu.c index cd42fdd12..5326e5dcf 100644 --- a/osfmk/ppc/cpu.c +++ b/osfmk/ppc/cpu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -48,7 +49,6 @@ #include #include #include -#include #include decl_mutex_data(static,ppt_lock); @@ -65,9 +65,9 @@ static unsigned int rht_state = 0; decl_simple_lock_data(static,SignalReadyLock); struct SIGtimebase { - boolean_t avail; - boolean_t ready; - boolean_t done; + volatile boolean_t avail; + volatile boolean_t ready; + volatile boolean_t done; uint64_t abstime; }; @@ -120,8 +120,9 @@ cpu_init( mttbu(proc_info->save_tbu); mttb(proc_info->save_tbl); } - - setTimerReq(); /* Now that the time base is sort of correct, request the next timer pop */ + + proc_info->rtcPop = EndOfAllTime; /* forget any existing decrementer setting */ + etimer_resync_deadlines(); /* Now that the time base is sort of correct, request the next timer pop */ proc_info->cpu_type = CPU_TYPE_POWERPC; proc_info->cpu_subtype = (cpu_subtype_t)proc_info->pf.rptdProc; @@ -705,7 +706,7 @@ cpu_sync_timebase( (unsigned int)&syncClkSpot) != KERN_SUCCESS) continue; - while (*(volatile int *)&(syncClkSpot.avail) == FALSE) + while (syncClkSpot.avail == FALSE) continue; isync(); @@ -723,11 +724,10 @@ cpu_sync_timebase( syncClkSpot.ready = TRUE; - while (*(volatile int *)&(syncClkSpot.done) == FALSE) + while (syncClkSpot.done == FALSE) continue; - setTimerReq(); /* Start the timer */ - + etimer_resync_deadlines(); /* Start the timer */ (void)ml_set_interrupts_enabled(intr); } @@ -760,7 +760,8 @@ cpu_timebase_signal_handler( timebaseAddr->avail = TRUE; - while (*(volatile int *)&(timebaseAddr->ready) == FALSE); + while (timebaseAddr->ready == FALSE) + continue; if(proc_info->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) proc_info->time_base_enable(proc_info->cpu_id, TRUE); diff --git a/osfmk/ppc/cpu_capabilities.h b/osfmk/ppc/cpu_capabilities.h index 7c7539426..ddaf69688 100644 --- a/osfmk/ppc/cpu_capabilities.h +++ b/osfmk/ppc/cpu_capabilities.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -91,6 +91,17 @@ static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) > #define _COMM_PAGE_AREA_LENGTH ( 7*4096) // reserved length of entire comm area #define _COMM_PAGE_AREA_USED ( 2*4096) // we use two pages so far +/* The following set of definitions are used in the kernel, which needs to distinguish between + * the 32 and 64-bit commpage addresses and lengths. On PPC they are the same, but on Imtel + * they are not. + */ +#define _COMM_PAGE32_BASE_ADDRESS ( _COMM_PAGE_BASE_ADDRESS ) +#define _COMM_PAGE64_BASE_ADDRESS ( _COMM_PAGE_BASE_ADDRESS ) +#define _COMM_PAGE32_AREA_LENGTH ( _COMM_PAGE_AREA_LENGTH ) +#define _COMM_PAGE64_AREA_LENGTH ( _COMM_PAGE_AREA_LENGTH ) +#define _COMM_PAGE32_AREA_USED ( _COMM_PAGE_AREA_USED ) +#define _COMM_PAGE64_AREA_USED ( _COMM_PAGE_AREA_USED ) + /* The Objective-C runtime fixed address page to optimize message dispatch */ #define _OBJC_PAGE_BASE_ADDRESS (-20*4096) // start at page -20, ie 0xFFFEC000 diff --git a/osfmk/ppc/db_interface.c b/osfmk/ppc/db_interface.c index 55dda17cb..cfa40aa2c 100644 --- a/osfmk/ppc/db_interface.c +++ b/osfmk/ppc/db_interface.c @@ -585,15 +585,3 @@ void db_reboot( db_printf("Sorry, system can't reboot automatically yet... You need to do it by hand...\n"); } - -/* - * Switch to gdb - */ -void -db_to_gdb( - void) -{ - extern unsigned int switch_debugger; - - switch_debugger=1; -} diff --git a/osfmk/ppc/db_machdep.h b/osfmk/ppc/db_machdep.h index 2f42b9a8f..6037cc731 100644 --- a/osfmk/ppc/db_machdep.h +++ b/osfmk/ppc/db_machdep.h @@ -152,8 +152,6 @@ extern void db_low_trace( int have_addr, db_expr_t count, char *modif); -extern void db_to_gdb( - void); /* macros for printing OS server dependent task name */ diff --git a/osfmk/ppc/exception.h b/osfmk/ppc/exception.h index 41ccba26b..9c4ecc0ef 100644 --- a/osfmk/ppc/exception.h +++ b/osfmk/ppc/exception.h @@ -41,10 +41,10 @@ #include #include #include +#include #include #include #include -#include #include /* Per processor CPU features */ @@ -343,7 +343,7 @@ struct per_proc_info { /* PPC cache line boundary here - 140 */ void * pp_cbfr; void * pp_chud; - uint64_t rtclock_tick_deadline; + uint64_t rtclock_intr_deadline; rtclock_timer_t rtclock_timer; unsigned int ppbbTaskEnv; /* BlueBox Task Environment */ diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c index 7bf11a53e..3dbf4a9d7 100644 --- a/osfmk/ppc/hibernate_ppc.c +++ b/osfmk/ppc/hibernate_ppc.c @@ -113,6 +113,14 @@ hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, save_snapshot(); } +// mark pages not to be saved and not for scratch usage during restore +void +hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) +{ +} + kern_return_t hibernate_processor_setup(IOHibernateImageHeader * header) { @@ -190,3 +198,7 @@ void ml_ppc_sleep(void) } } +void +hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t runtime_offset) +{ +} diff --git a/osfmk/ppc/hw_lock.s b/osfmk/ppc/hw_lock.s index 01b1bdced..e56f3f94c 100644 --- a/osfmk/ppc/hw_lock.s +++ b/osfmk/ppc/hw_lock.s @@ -32,6 +32,7 @@ #define WAIT_FLAG 0x02 #define WANT_UPGRADE 0x04 #define WANT_EXCL 0x08 +#define PRIV_EXCL 0x8000 #define TH_FN_OWNED 0x01 @@ -2004,8 +2005,10 @@ LEXT(lck_rw_lock_exclusive) .globl EXT(lock_write) LEXT(lock_write) #endif + lis r7,0xFFFF + ori r7,r7,(WANT_EXCL|WANT_UPGRADE|ILK_LOCKED) rwleloop: lwarx r5,RW_DATA,r3 ; Grab the lock value - rlwinm. r7,r5,30,1,31 ; Can we have it? + and. r8,r5,r7 ; Can we have it? ori r6,r5,WANT_EXCL ; Mark Exclusive bne-- rwlespin ; Branch if cannot be held stwcx. r6,RW_DATA,r3 ; Update lock word @@ -2038,14 +2041,21 @@ LEXT(lock_read) #endif rwlsloop: lwarx r5,RW_DATA,r3 ; Grab the lock value andi. r7,r5,WANT_EXCL|WANT_UPGRADE|ILK_LOCKED ; Can we have it? + bne-- rwlsopt ; Branch if cannot be held +rwlsloopres: addis r6,r5,1 ; Increment read cnt - bne-- rwlsspin ; Branch if cannot be held stwcx. r6,RW_DATA,r3 ; Update lock word bne-- rwlsloop .globl EXT(rwlsPatch_isync) LEXT(rwlsPatch_isync) isync blr +rwlsopt: + andi. r7,r5,PRIV_EXCL|ILK_LOCKED ; Can we have it? + bne-- rwlsspin ; Branch if cannot be held + lis r7,0xFFFF ; Get read cnt mask + and. r8,r5,r7 ; Is it shared + bne rwlsloopres ; Branch if can be held rwlsspin: li r4,lgKillResv ; Killing field stwcx. r4,0,r4 ; Kill it @@ -2192,8 +2202,9 @@ rwtlsloop: lwarx r5,RW_DATA,r3 ; Grab the lock value andi. r7,r5,ILK_LOCKED ; Test interlock flag bne-- rwtlsspin ; Branch if interlocked andi. r7,r5,WANT_EXCL|WANT_UPGRADE ; So, can we have it? + bne-- rwtlsopt ; Branch if held exclusive +rwtlsloopres: addis r6,r5,1 ; Increment read cnt - bne-- rwtlsfail ; Branch if held exclusive stwcx. r6,RW_DATA,r3 ; Update lock word bne-- rwtlsloop .globl EXT(rwtlsPatch_isync) @@ -2201,6 +2212,12 @@ LEXT(rwtlsPatch_isync) isync li r3,1 ; Return TRUE blr +rwtlsopt: + andi. r7,r5,PRIV_EXCL ; Can we have it? + bne-- rwtlsfail ; Branch if cannot be held + lis r7,0xFFFF ; Get read cnt mask + and. r8,r5,r7 ; Is it shared + bne rwtlsloopres ; Branch if can be held rwtlsfail: li r3,0 ; Return FALSE blr diff --git a/osfmk/ppc/hw_vm.s b/osfmk/ppc/hw_vm.s index 4984d1e39..27e805ae9 100644 --- a/osfmk/ppc/hw_vm.s +++ b/osfmk/ppc/hw_vm.s @@ -2868,17 +2868,19 @@ hwpSPrtPhy: cmplw r0,r0 ; Make sure we return CR0_EQ ; Function 2 - Set protection in mapping +; NOTE: Changes to no-execute permission are ignored + .set .,hwpOpBase+(2*128) ; Generate error if previous function too long hwpSPrtMap: lwz r9,mpFlags(r31) ; Get the mapping flags lwz r8,mpVAddr+4(r31) ; Get the protection part of mapping rlwinm. r9,r9,0,mpPermb,mpPermb ; Is the mapping permanent? - li r0,lo16(mpN|mpPP) ; Get no-execute and protection bits + li r0,lo16(mpPP) ; Get protection bits crnot cr0_eq,cr0_eq ; Change CR0_EQ to true if mapping is permanent - rlwinm r2,r25,0,mpNb-32,mpPPe-32 ; Isolate new no-execute and protection bits + rlwinm r2,r25,0,mpPP ; Isolate new protection bits beqlr-- ; Leave if permanent mapping (before we trash R5)... - andc r5,r5,r0 ; Clear the old no-execute and prot bits - or r5,r5,r2 ; Move in the new no-execute and prot bits + andc r5,r5,r0 ; Clear the old prot bits + or r5,r5,r2 ; Move in the new prot bits rlwimi r8,r5,0,20,31 ; Copy into the mapping copy cmpw r0,r0 ; Make sure we return CR0_EQ stw r8,mpVAddr+4(r31) ; Set the flag part of mapping diff --git a/osfmk/ppc/interrupt.c b/osfmk/ppc/interrupt.c index aba525045..b7bd02bbd 100644 --- a/osfmk/ppc/interrupt.c +++ b/osfmk/ppc/interrupt.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include #include @@ -103,7 +105,7 @@ struct savearea * interrupt( } } - rtclock_intr(ssp); + etimer_intr(USER_MODE(ssp->save_srr1), ssp->save_srr0); /* Handle event timer */ break; case T_INTERRUPT: diff --git a/osfmk/ppc/io_map.c b/osfmk/ppc/io_map.c index 3c47658ec..373c3965f 100644 --- a/osfmk/ppc/io_map.c +++ b/osfmk/ppc/io_map.c @@ -46,15 +46,14 @@ extern vm_offset_t virtual_avail; * Note, this will onl */ vm_offset_t -io_map(phys_addr, size) - vm_offset_t phys_addr; - vm_size_t size; +io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; int i; - unsigned int j; + unsigned int j, mflags; vm_page_t m; + mflags = mmFlgBlock | mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ #if DEBUG assert (kernel_map != VM_MAP_NULL); /* VM must be initialised */ @@ -67,7 +66,7 @@ io_map(phys_addr, size) (void) kmem_alloc_pageable(kernel_map, &start, size); /* Get some virtual addresses to use */ (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), - (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded), /* Map as I/O page */ + mflags, /* Map with requested cache mode */ (size >> 12), VM_PROT_READ|VM_PROT_WRITE); return (start + (phys_addr & PAGE_MASK)); /* Pass back the physical address */ @@ -87,7 +86,7 @@ io_map(phys_addr, size) (void)mapping_make(kernel_pmap, (addr64_t)(start + i), m->phys_page, - (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded), /* Map as I/O page */ + mflags, /* Map with requested cache mode */ 1, VM_PROT_READ|VM_PROT_WRITE); } @@ -102,21 +101,24 @@ io_map(phys_addr, size) * Allocate and map memory for devices before the VM system comes alive. */ -vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size) +vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags) { vm_offset_t start; + unsigned int mflags; if(kernel_map != VM_MAP_NULL) { /* If VM system is up, redirect to normal routine */ - return io_map(phys_addr, size); /* Map the address */ + return io_map(phys_addr, size, flags); /* Map the address */ } + + mflags = mmFlgBlock | mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ size = round_page(size + (phys_addr - (phys_addr & -PAGE_SIZE))); /* Extend the length to include it all */ start = pmap_boot_map(size); /* Get me some virtual address */ (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), - (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded), /* Map as I/O page */ + mflags, /* Map with requested cache mode */ (size >> 12), VM_PROT_READ|VM_PROT_WRITE); return (start + (phys_addr & PAGE_MASK)); diff --git a/osfmk/ppc/io_map_entries.h b/osfmk/ppc/io_map_entries.h index afdf4ac1c..45b25db4e 100644 --- a/osfmk/ppc/io_map_entries.h +++ b/osfmk/ppc/io_map_entries.h @@ -30,8 +30,9 @@ extern vm_offset_t io_map( vm_offset_t phys_addr, - vm_size_t size); -extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size); + vm_size_t size, + unsigned int flags); +extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size, unsigned int flags); #endif /* _PPC_IO_MAP_ENTRIES_H_ */ diff --git a/osfmk/ppc/locks.h b/osfmk/ppc/locks.h index ab878b6d4..d8fa8a14a 100644 --- a/osfmk/ppc/locks.h +++ b/osfmk/ppc/locks.h @@ -134,7 +134,8 @@ typedef struct { union { struct { unsigned int lck_rwd_shared_cnt:16, /* No. of shared granted request */ - lck_rwd_pad16:12, /* padding */ + lck_rwd_priv_excl:1, /* priority for Writer */ + lck_rwd_pad17:11, /* padding */ lck_rwd_want_excl:1, /* Writer is waiting, or locked for write */ lck_rwd_want_upgrade:1, /* Read-to-write upgrade waiting */ lck_rwd_waiting:1, /* Someone is sleeping on lock */ @@ -154,6 +155,7 @@ typedef struct { #define lck_rw_want_upgrade lck_rw_sw.lck_rwd.lck_rwd_want_upgrade #define lck_rw_want_excl lck_rw_sw.lck_rwd.lck_rwd_want_excl #define lck_rw_waiting lck_rw_sw.lck_rwd.lck_rwd_waiting +#define lck_rw_priv_excl lck_rw_sw.lck_rwd.lck_rwd_priv_excl #define lck_rw_shared_cnt lck_rw_sw.lck_rwd.lck_rwd_shared_cnt #define lck_rw_tag lck_rw_sw.lck_rwi.lck_rwi_tag diff --git a/osfmk/ppc/locks_ppc.c b/osfmk/ppc/locks_ppc.c index 4ea067e9d..2fad8c38d 100644 --- a/osfmk/ppc/locks_ppc.c +++ b/osfmk/ppc/locks_ppc.c @@ -704,6 +704,9 @@ lock_init( #if MACH_LDEBUG lck->lck_rw_deb.type = RW_TAG; lck->lck_rw_attr |= (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD|LCK_RW_ATTR_DIS_MYLOCK); + lck->lck_rw.lck_rw_priv_excl = TRUE; +#else + lck->lck_rw_priv_excl = TRUE; #endif } @@ -810,6 +813,10 @@ lck_rw_init( } } else { (void) memset((void *) lck, 0, sizeof(lck_rw_t)); + if ((lck_attr->lck_attr_val) & LCK_ATTR_RW_SHARED_PRIORITY) + lck->lck_rw_priv_excl = FALSE; + else + lck->lck_rw_priv_excl = TRUE; } lck_grp_reference(grp); @@ -826,6 +833,10 @@ lck_rw_ext_init( lck_attr_t *attr) { bzero((void *)lck, sizeof(lck_rw_ext_t)); + if ((attr->lck_attr_val) & LCK_ATTR_RW_SHARED_PRIORITY) + lck->lck_rw.lck_rw_priv_excl = FALSE; + else + lck->lck_rw.lck_rw_priv_excl = TRUE; if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { lck->lck_rw_deb.type = RW_TAG; @@ -1083,7 +1094,8 @@ lck_rw_lock_shared_gen( lck_rw_ilk_lock(lck); - while (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + while ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && + ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) { i = lock_wait_time[1]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, @@ -1091,12 +1103,15 @@ lck_rw_lock_shared_gen( if (i != 0) { lck_rw_ilk_unlock(lck); - while (--i != 0 && (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade)) + while (--i != 0 && + (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && + ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) continue; lck_rw_ilk_lock(lck); } - if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && + ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) { lck->lck_rw_waiting = TRUE; res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); if (res == THREAD_WAITING) { @@ -1273,7 +1288,8 @@ lck_rw_try_lock_shared_gen( { lck_rw_ilk_lock(lck); - if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && + ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) { lck_rw_ilk_unlock(lck); return(FALSE); } @@ -1509,7 +1525,8 @@ lck_rw_lock_shared_ext( if (lock_stat) lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; - while (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + while ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) && + ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) { i = lock_wait_time[1]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, @@ -1522,12 +1539,15 @@ lck_rw_lock_shared_ext( if (i != 0) { lck_rw_ilk_unlock(&lck->lck_rw); - while (--i != 0 && (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade)) + while (--i != 0 && + (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) && + ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) continue; lck_rw_ilk_lock(&lck->lck_rw); } - if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) && + ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) { lck->lck_rw.lck_rw_waiting = TRUE; res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); if (res == THREAD_WAITING) { @@ -1784,7 +1804,8 @@ lck_rw_try_lock_shared_ext( if (lock_stat) lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; - if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) && + ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) { if (lock_stat) { lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; } diff --git a/osfmk/ppc/lowglobals.h b/osfmk/ppc/lowglobals.h index debb37c22..861d9034a 100644 --- a/osfmk/ppc/lowglobals.h +++ b/osfmk/ppc/lowglobals.h @@ -80,8 +80,12 @@ typedef struct lowglo { unsigned int lgRsv380[32]; /* 5380 - 5400 reserved */ unsigned int lgRsv400[32]; /* 5400 - 5480 reserved */ - - uint32_t lgRsv480[704]; /* 5480 reserved - push to 1 page */ + uint32_t lgKmodptr; /* 0x5480 Pointer to kmod, debugging aid */ + uint32_t lgTransOff; /* 0x5484 Pointer to kdp_trans_off, debugging aid */ + uint32_t lgReadIO; /* 0x5488 Pointer to kdp_read_io, debugging aid */ + uint32_t lgDevSlot1; /* 0x548C For developer use */ + uint32_t lgDevSlot2; /* 0x5490 For developer use */ + uint32_t lgRsv494[731]; /* 0x5494 reserved - push to 1 page */ } lowglo; diff --git a/osfmk/ppc/lowmem_vectors.s b/osfmk/ppc/lowmem_vectors.s index 1a14e5d94..3ee46a474 100644 --- a/osfmk/ppc/lowmem_vectors.s +++ b/osfmk/ppc/lowmem_vectors.s @@ -1548,7 +1548,7 @@ noPerfMonSave32: lwz r25,traceMask(0) ; Get the trace mask li r0,SAVgeneral ; Get the savearea type value lhz r19,PP_CPU_NUMBER(r2) ; Get the logical processor number - rlwinm r22,r11,30,0,31 ; Divide interrupt code by 2 + rlwinm r22,r11,30,0,31 ; Divide interrupt code by 4 stb r0,SAVflags+2(r13) ; Mark valid context addi r22,r22,10 ; Adjust code so we shift into CR5 li r23,trcWork ; Get the trace work area address @@ -3959,7 +3959,12 @@ EXT(killresv): .long 0 ; 5470 reserved .long 0 ; 5474 reserved .long 0 ; 5478 reserved - .long 0 ; 547C reserved + .long 0 ; 547C reserved + .long EXT(kmod) ; 5480 Pointer to kmod, debugging aid + .long EXT(kdp_trans_off) ; 5484 Pointer to kdp_trans_off, debugging aid + .long EXT(kdp_read_io) ; 5488 Pointer to kdp_read_io, debugging aid + .long 0 ; 548C Reserved for developer use + .long 0 ; 5490 Reserved for developer use ; ; The "shared page" is used for low-level debugging ; diff --git a/osfmk/ppc/machine_cpu.h b/osfmk/ppc/machine_cpu.h index 438dfd533..13967afec 100644 --- a/osfmk/ppc/machine_cpu.h +++ b/osfmk/ppc/machine_cpu.h @@ -40,4 +40,6 @@ typedef void (*broadcastFunc) (uint32_t); int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t); +#define cpu_pause() /* Not for this architecture */ + #endif /* _PPC_MACHINE_CPU_H_ */ diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c index 749446f77..bcfaf79e2 100644 --- a/osfmk/ppc/machine_routines.c +++ b/osfmk/ppc/machine_routines.c @@ -39,7 +39,7 @@ #include -unsigned int LockTimeOut = 12500000; +unsigned int LockTimeOut = 1250000000; unsigned int MutexSpin = 0; decl_mutex_data(static,mcpus_lock); @@ -142,9 +142,17 @@ ml_io_map( vm_offset_t phys_addr, vm_size_t size) { - return(io_map(phys_addr,size)); + return(io_map(phys_addr,size,VM_WIMG_IO)); } + +void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size) +{ + *phys_addr = 0; + *size = 0; +} + + /* * Routine: ml_static_malloc * Function: static memory allocation diff --git a/osfmk/ppc/machine_routines.h b/osfmk/ppc/machine_routines.h index 02850ad89..46c834146 100644 --- a/osfmk/ppc/machine_routines.h +++ b/osfmk/ppc/machine_routines.h @@ -212,6 +212,11 @@ extern vm_offset_t ml_io_map( vm_offset_t phys_addr, vm_size_t size); +void ml_get_bouncepool_info( + vm_offset_t *phys_addr, + vm_size_t *size); + + /* boot memory allocation */ extern vm_offset_t ml_static_malloc( vm_size_t size); diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s index 15a93612f..e7c5916ba 100644 --- a/osfmk/ppc/machine_routines_asm.s +++ b/osfmk/ppc/machine_routines_asm.s @@ -1969,19 +1969,6 @@ LEXT(current_act) mfsprg r3,1 blr - - .align 5 - .globl EXT(clock_get_uptime) -LEXT(clock_get_uptime) -1: mftbu r9 - mftb r0 - mftbu r11 - cmpw r11,r9 - bne-- 1b - stw r0,4(r3) - stw r9,0(r3) - blr - .align 5 .globl EXT(mach_absolute_time) diff --git a/osfmk/ppc/mappings.c b/osfmk/ppc/mappings.c index f5138334f..e92ce2469 100644 --- a/osfmk/ppc/mappings.c +++ b/osfmk/ppc/mappings.c @@ -77,6 +77,8 @@ extern unsigned int DebugWork; /* (BRINGUP) */ void mapping_verify(void); void mapping_phys_unused(ppnum_t pa); +int nx_enabled = 0; /* enable no-execute protection */ + /* * ppc_prot translates Mach's representation of protections to that of the PPC hardware. * For Virtual Machines (VMM), we also provide translation entries where the output is @@ -85,15 +87,25 @@ void mapping_phys_unused(ppnum_t pa); * 8 table entries; direct translations are placed in the range 8..16, so they fall into * the second half of the table. * - * ***NOTE*** I've commented out the Mach->PPC translations that would set page-level - * no-execute, pending updates to the VM layer that will properly enable its - * use. Bob Abeles 08.02.04 */ -//unsigned char ppc_prot[16] = { 4, 7, 6, 6, 3, 3, 2, 2, /* Mach -> PPC translations */ -unsigned char ppc_prot[16] = { 0, 3, 2, 2, 3, 3, 2, 2, /* Mach -> PPC translations */ +unsigned char ppc_prot[16] = { 4, 7, 6, 6, 3, 3, 2, 2, /* Mach -> PPC translations */ 0, 1, 2, 3, 4, 5, 6, 7 }; /* VMM direct translations */ + + +vm_prot_t getProtPPC(int key, boolean_t disable_NX) { + vm_prot_t prot; + + prot = ppc_prot[key & 0xF]; + + if (key <= 7 && disable_NX == TRUE) + prot &= ~mpN; + + return (prot); +} + + /* * About PPC VSID generation: * @@ -296,6 +308,7 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int pindex, mflags, pattr, wimg, rc; phys_entry_t *physent; int nlists, pcf; + boolean_t disable_NX = FALSE; pindex = 0; @@ -366,10 +379,12 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, mp->u.mpBSize = size; /* Set the size */ mp->mpPte = 0; /* Set the PTE invalid */ mp->mpPAddr = pa; /* Set the physical page number */ - mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) /* Add the protection and attributes to the field */ - | ((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)? - getProtPPC(prot) : (getProtPPC(prot) & 0x3)); /* Mask off no-execute control for 32-bit machines */ - + + if ( !nx_enabled || (pmap->pmapFlags & pmapNXdisabled) ) + disable_NX = TRUE; + + mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot, disable_NX); /* Add the protection and attributes to the field */ + while(1) { /* Keep trying... */ colladdr = hw_add_map(pmap, mp); /* Go add the mapping to the pmap */ rc = colladdr & mapRetCode; /* Separate return code */ @@ -476,8 +491,12 @@ void mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) { /* Change protection of a virtual page */ int ret; - - ret = hw_protect(pmap, va, getProtPPC(prot), nextva); /* Try to change the protect here */ + boolean_t disable_NX = FALSE; + + if ( !nx_enabled || (pmap->pmapFlags & pmapNXdisabled) ) + disable_NX = TRUE; + + ret = hw_protect(pmap, va, getProtPPC(prot, disable_NX), nextva); /* Try to change the protect here */ switch (ret) { /* Decode return code */ @@ -499,8 +518,8 @@ mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) { /* * * This routine takes a physical entry and runs through all mappings attached to it and changes * the protection. If there are PTEs associated with the mappings, they will be invalidated before - * the protection is changed. There is no limitation on changes, e.g., - * higher to lower, lower to higher. + * the protection is changed. There is no limitation on changes, e.g., higher to lower, lower to + * higher; however, changes to execute protection are ignored. * * Any mapping that is marked permanent is not changed * @@ -511,16 +530,16 @@ void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) { /* Change protection of unsigned int pindex; phys_entry_t *physent; - + physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_protect_phys: invalid physical page %08X\n", pa); } hw_walk_phys(physent, hwpNoop, hwpSPrtMap, hwpNoop, - getProtPPC(prot), hwpPurgePTE); /* Set the new protection for page and mappings */ + getProtPPC(prot, FALSE), hwpPurgePTE); /* Set the new protection for page and mappings */ - return; /* Leave... */ + return; /* Leave... */ } @@ -1493,52 +1512,18 @@ addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa) { /* Finds first virtual mappin } -/* - * phystokv(addr) - * - * Convert a physical address to a kernel virtual address if - * there is a mapping, otherwise return NULL - */ - -vm_offset_t phystokv(vm_offset_t pa) { - - addr64_t va; - ppnum_t pp; - - pp = pa >> 12; /* Convert to a page number */ - - if(!(va = mapping_p2v(kernel_pmap, pp))) { - return 0; /* Can't find it, return 0... */ - } - - return (va | (pa & (PAGE_SIZE - 1))); /* Build and return VADDR... */ - -} /* * kvtophys(addr) * * Convert a kernel virtual address to a physical address */ -vm_offset_t kvtophys(vm_offset_t va) { +addr64_t kvtophys(vm_offset_t va) { return pmap_extract(kernel_pmap, va); /* Find mapping and lock the physical entry for this mapping */ } -/* - * kvtophys64(addr) - * - * Convert a kernel virtual address to a 64-bit physical address - */ -vm_map_offset_t kvtophys64(vm_map_offset_t va) { - ppnum_t pa = pmap_find_phys(kernel_pmap, (addr64_t)va); - - if (!pa) - return (vm_map_offset_t)0; - return (((vm_map_offset_t)pa) << 12) | (va & 0xfff); -} - /* * void ignore_zero_fault(boolean_t) - Sets up to ignore or honor any fault on * page 0 access for the current thread. @@ -1556,6 +1541,13 @@ void ignore_zero_fault(boolean_t type) { /* Sets up to ignore or honor any fa return; /* Return the result or 0... */ } +/* + * nop in current ppc implementation + */ +void inval_copy_windows(__unused thread_t t) +{ +} + /* * Copies data between a physical page and a virtual page, or 2 physical. This is used to diff --git a/osfmk/ppc/mappings.h b/osfmk/ppc/mappings.h index 6d910c3de..69cbe756d 100644 --- a/osfmk/ppc/mappings.h +++ b/osfmk/ppc/mappings.h @@ -365,7 +365,7 @@ extern mappingctl_t mapCtl; /* Mapping allocation control */ extern unsigned char ppc_prot[]; /* Mach -> PPC protection translation table */ -#define getProtPPC(__key) (ppc_prot[(__key) & 0xF]) +vm_prot_t getProtPPC(int, boolean_t); /* Safe Mach -> PPC protection key conversion */ extern addr64_t mapping_remove(pmap_t pmap, addr64_t va); /* Remove a single mapping for this VADDR */ diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c index 875ee6912..ce05767b9 100644 --- a/osfmk/ppc/pmap.c +++ b/osfmk/ppc/pmap.c @@ -231,8 +231,14 @@ pmap_map( vm_offset_t va, vm_offset_t spa, vm_offset_t epa, - vm_prot_t prot) + vm_prot_t prot, + unsigned int flags) { + unsigned int mflags; + mflags = 0; /* Make sure this is initialized to nothing special */ + if(!(flags & VM_WIMG_USE_DEFAULT)) { /* Are they supplying the attributes? */ + mflags = mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ + } addr64_t colladr; @@ -240,7 +246,8 @@ pmap_map( assert(epa > spa); - colladr = mapping_make(kernel_pmap, (addr64_t)va, (ppnum_t)(spa >> 12), (mmFlgBlock | mmFlgPerm), (epa - spa) >> 12, prot & VM_PROT_ALL); + colladr = mapping_make(kernel_pmap, (addr64_t)va, (ppnum_t)(spa >> 12), + (mmFlgBlock | mmFlgPerm), (epa - spa) >> 12, (prot & VM_PROT_ALL) ); if(colladr) { /* Was something already mapped in the range? */ panic("pmap_map: attempt to map previously mapped range - va = %08X, pa = %08X, epa = %08X, collision = %016llX\n", @@ -357,6 +364,7 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */ kernel_pmap->ref_count = 1; kernel_pmap->pmapFlags = pmapKeyDef; /* Set the default keys */ + kernel_pmap->pmapFlags |= pmapNXdisabled; kernel_pmap->pmapCCtl = pmapCCtlVal; /* Initialize cache control */ kernel_pmap->space = PPC_SID_KERNEL; kernel_pmap->pmapvr = 0; /* Virtual = Real */ @@ -525,7 +533,7 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) /* Map V=R the page tables */ pmap_map(first_used_addr, first_used_addr, - round_page(first_used_addr + size), VM_PROT_READ | VM_PROT_WRITE); + round_page(first_used_addr + size), VM_PROT_READ | VM_PROT_WRITE, VM_WIMG_USE_DEFAULT); *first_avail = round_page(first_used_addr + size); /* Set next available page */ first_free_virt = *first_avail; /* Ditto */ @@ -648,7 +656,7 @@ void pmap_virtual_space( * only, and is bounded by that size. */ pmap_t -pmap_create(vm_map_size_t size) +pmap_create(vm_map_size_t size, __unused boolean_t is_64bit) { pmap_t pmap, ckpmap, fore; int s; @@ -936,7 +944,7 @@ pmap_page_protect( mapping_t *mp; - switch (prot) { + switch (prot & VM_PROT_ALL) { case VM_PROT_READ: case VM_PROT_READ|VM_PROT_EXECUTE: remove = FALSE; @@ -983,7 +991,7 @@ pmap_page_protect( * physical page. */ - mapping_protect_phys(pa, prot & VM_PROT_ALL); /* Change protection of all mappings to page. */ + mapping_protect_phys(pa, (prot & VM_PROT_ALL) ); /* Change protection of all mappings to page. */ } @@ -1061,7 +1069,7 @@ void pmap_protect( endva = eva & -4096LL; /* Round end down to a page */ while(1) { /* Go until we finish the range */ - mapping_protect(pmap, va, prot & VM_PROT_ALL, &va); /* Change the protection and see what's next */ + mapping_protect(pmap, va, (prot & VM_PROT_ALL), &va); /* Change the protection and see what's next */ if((va == 0) || (va >= endva)) break; /* End loop if we finish range or run off the end */ } @@ -1102,7 +1110,7 @@ pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, while(1) { /* Keep trying the enter until it goes in */ - colva = mapping_make(pmap, va, pa, mflags, 1, prot & VM_PROT_ALL); /* Enter the mapping into the pmap */ + colva = mapping_make(pmap, va, pa, mflags, 1, (prot & VM_PROT_ALL) ); /* Enter the mapping into the pmap */ if(!colva) break; /* If there were no collisions, we are done... */ @@ -1287,6 +1295,29 @@ pmap_attribute( } + + +unsigned int pmap_cache_attributes(ppnum_t pgn) { + + unsigned int flags; + struct phys_entry * pp; + + // Find physical address + if ((pp = pmap_find_physentry(pgn))) { + // Use physical attributes as default + // NOTE: DEVICE_PAGER_FLAGS are made to line up + flags = VM_MEM_COHERENT; /* We only support coherent memory */ + if (pp->ppLink & ppG) flags |= VM_MEM_GUARDED; /* Add in guarded if it is */ + if (pp->ppLink & ppI) flags |= VM_MEM_NOT_CACHEABLE; /* Add in cache inhibited if so */ + } else + // If no physical, just hard code attributes + flags = VM_WIMG_IO; + + return (flags); +} + + + /* * pmap_attribute_cache_sync(vm_offset_t pa) * @@ -1956,7 +1987,7 @@ void pmap_init_sharedpage(vm_offset_t cpg){ addr64_t cva, cpoff; ppnum_t cpphys; - sharedPmap = pmap_create(0); /* Get a pmap to hold the common segment */ + sharedPmap = pmap_create(0, FALSE); /* Get a pmap to hold the common segment */ if(!sharedPmap) { /* Check for errors */ panic("pmap_init_sharedpage: couldn't make sharedPmap\n"); } @@ -1969,7 +2000,7 @@ void pmap_init_sharedpage(vm_offset_t cpg){ } cva = mapping_make(sharedPmap, (addr64_t)((uint32_t)_COMM_PAGE_BASE_ADDRESS) + cpoff, - cpphys, mmFlgPerm, 1, VM_PROT_READ); /* Map the page read only */ + cpphys, mmFlgPerm, 1, VM_PROT_READ | VM_PROT_EXECUTE); /* Map the page read/execute only */ if(cva) { /* Check for errors */ panic("pmap_init_sharedpage: couldn't map commpage page - cva = %016llX\n", cva); } @@ -2056,3 +2087,13 @@ coredumpok( { return TRUE; } + + +/* + * disable no-execute capability on + * the specified pmap + */ +void pmap_disable_NX(pmap_t pmap) { + + pmap->pmapFlags |= pmapNXdisabled; +} diff --git a/osfmk/ppc/pmap.h b/osfmk/ppc/pmap.h index 3302ca747..76ff6cd34 100644 --- a/osfmk/ppc/pmap.h +++ b/osfmk/ppc/pmap.h @@ -160,6 +160,7 @@ struct pmap { #define pmapKeyDef 0x00000006 /* Default keys - Sup = 1, user = 1, no ex = 0 */ #define pmapVMhost 0x00000010 /* pmap with Virtual Machines attached to it */ #define pmapVMgsaa 0x00000020 /* Guest shadow assist active */ +#define pmapNXdisabled 0x00000040 /* no-execute disabled for this pmap */ unsigned int spaceNum; /* Space number */ unsigned int pmapCCtl; /* Cache control */ #define pmapCCtlVal 0xFFFF0000 /* Valid entries */ @@ -276,13 +277,12 @@ extern pmapTransTab *pmapTrans; /* Space to pmap translate table */ /* * prototypes. */ -extern vm_offset_t phystokv(vm_offset_t pa); /* Get kernel virtual address from physical */ -extern vm_offset_t kvtophys(vm_offset_t va); /* Get physical address from kernel virtual */ -extern vm_map_offset_t kvtophys64(vm_map_offset_t va); /* Get 64-bit physical address from kernel virtual */ +extern addr64_t kvtophys(vm_offset_t va); /* Get physical address from kernel virtual */ extern vm_offset_t pmap_map(vm_offset_t va, vm_offset_t spa, vm_offset_t epa, - vm_prot_t prot); + vm_prot_t prot, + unsigned int flags); extern kern_return_t pmap_add_physical_memory(vm_offset_t spa, vm_offset_t epa, boolean_t available, @@ -320,6 +320,10 @@ extern int pmap_list_resident_pages( extern void pmap_init_sharedpage(vm_offset_t cpg); extern void pmap_map_sharedpage(task_t task, pmap_t pmap); extern void pmap_unmap_sharedpage(pmap_t pmap); +extern void pmap_disable_NX(pmap_t pmap); +/* Not required for ppc: */ +static inline void pmap_set_4GB_pagezero(__unused pmap_t pmap) {} +static inline void pmap_clear_4GB_pagezero(__unused pmap_t pmap) {} diff --git a/osfmk/ppc/pms.h b/osfmk/ppc/pms.h index 799b9a462..d978becea 100644 --- a/osfmk/ppc/pms.h +++ b/osfmk/ppc/pms.h @@ -24,144 +24,6 @@ #ifndef _PPC_PMS_H_ #define _PPC_PMS_H_ - -#define pmsMaxStates 64 -#define HalfwayToForever 0x7FFFFFFFFFFFFFFFULL -#define century 790560000000000ULL - -typedef void (*pmsSetFunc_t)(uint32_t, uint32_t, uint32_t); /* Function used to set hardware power state */ -typedef uint32_t (*pmsQueryFunc_t)(uint32_t, uint32_t); /* Function used to query hardware power state */ - -typedef struct pmsStat { - uint64_t stTime[2]; /* Total time until switch to next step */ - uint32_t stCnt[2]; /* Number of times switched to next step */ -} pmsStat; - -typedef struct pmsDef { - uint64_t pmsLimit; /* Max time in this state in microseconds */ - uint32_t pmsStepID; /* Unique ID for this step */ - uint32_t pmsSetCmd; /* Command to select power state */ -#define pmsCngXClk 0x80000000 /* Change external clock */ -#define pmsXUnk 0x7F /* External clock unknown */ -#define pmsXClk 0x7F000000 /* External clock frequency */ -#define pmsCngCPU 0x00800000 /* Change CPU parameters */ -#define pmsSync 0x00400000 /* Make changes synchronously, i.e., spin until delay finished */ -#define pmsMustCmp 0x00200000 /* Delay must complete before next change */ -#define pmsCPU 0x001F0000 /* CPU frequency */ -#define pmsCPUUnk 0x1F /* CPU frequency unknown */ -#define pmsCngVolt 0x00008000 /* Change voltage */ -#define pmsVoltage 0x00007F00 /* Voltage */ -#define pmsVoltUnk 0x7F /* Voltage unknown */ -#define pmsPowerID 0x000000FF /* Identify power state to HW */ - -/* Special commands - various things */ -#define pmsDelay 0xFFFFFFFD /* Delayed step, no processor or platform changes. Timer expiration causes transition to pmsTDelay */ -#define pmsParkIt 0xFFFFFFFF /* Enters the parked state. No processor or platform changes. Timers cancelled */ -#define pmsCInit ((pmsXUnk << 24) | (pmsCPUUnk << 16) | (pmsVoltUnk << 8)) /* Initial current set command value */ -/* Note: pmsSetFuncInd is an index into a table of function pointers and pmsSetFunc is the address - * of a function. Initially, when you create a step table, this field is set as an index into - * a table of function addresses that gets passed as a parameter to pmsBuild. When pmsBuild - * internalizes the step and function tables, it converts the index to the function address. - */ - union sf { - pmsSetFunc_t pmsSetFunc; /* Function used to set platform power state */ - uint32_t pmsSetFuncInd; /* Index to function in function table */ - } sf; - - uint32_t pmsDown; /* Next state if going lower */ - uint32_t pmsNext; /* Normal next state */ - uint32_t pmsTDelay; /* State if command was pmsDelay and timer expired */ -} pmsDef; - -typedef struct pmsCtl { - pmsStat (*pmsStats)[pmsMaxStates]; /* Pointer to statistics information, 0 if not enabled */ - pmsDef *pmsDefs[pmsMaxStates]; /* Indexed pointers to steps */ -} pmsCtl; - -/* - * Note that this block is in the middle of the per_proc and the size (32 bytes) - * can't be changed without moving it. - */ - -typedef struct pmsd { - uint32_t pmsState; /* Current power management state */ - uint32_t pmsCSetCmd; /* Current select command */ - uint64_t pmsPop; /* Time of next step */ - uint64_t pmsStamp; /* Time of transition to current state */ - uint64_t pmsTime; /* Total time in this state */ -} pmsd; - -/* - * Required power management step programs - */ - -enum { - pmsIdle = 0, /* Power state in idle loop */ - pmsNorm = 1, /* Normal step - usually low power */ - pmsNormHigh = 2, /* Highest power in normal step */ - pmsBoost = 3, /* Boost/overdrive step */ - pmsLow = 4, /* Lowest non-idle power state, no transitions */ - pmsHigh = 5, /* Power step for full on, no transitions */ - pmsPrepCng = 6, /* Prepare for step table change */ - pmsPrepSleep = 7, /* Prepare for sleep */ - pmsOverTemp = 8, /* Machine is too hot */ - pmsEnterNorm = 9, /* Enter into the normal step program */ - pmsFree = 10, /* First available empty step */ - pmsStartUp = 0xFFFFFFFE, /* Start stepping */ - pmsParked = 0xFFFFFFFF /* Power parked - used when changing stepping table */ -}; - -/* - * Power Management Stepper Control requests - */ - -enum { - pmsCPark = 0, /* Parks the stepper */ - pmsCStart = 1, /* Starts normal steppping */ - pmsCFLow = 2, /* Forces low power */ - pmsCFHigh = 3, /* Forces high power */ - pmsCCnfg = 4, /* Loads new stepper program */ - pmsCQuery = 5, /* Query current step and state */ - pmsCExperimental = 6, /* Enter experimental mode */ - pmsCFree = 7 /* Next control command to be assigned */ -}; - -extern pmsCtl pmsCtls; /* Power Management Stepper control */ -extern uint32_t pmsCtlp; -extern uint32_t pmsBroadcastWait; /* Number of outstanding broadcasts */ -extern pmsDef pmsDefault[]; -extern int pmsInstalled; -extern int pmsExperimental; - -#define pmsSetFuncMax 32 -extern pmsSetFunc_t pmsFuncTab[pmsSetFuncMax]; -extern pmsQueryFunc_t pmsQueryFunc; -extern uint32_t pmsPlatformData; - -extern int pmsCntrl(struct savearea *save); -extern void pmsInit(void); -extern void pmsStep(int timer); -extern void pmsDown(void); -extern void pmsSetStep(uint32_t nstep, int dir); -extern void pmsRemote(uint32_t nstep); -extern void pmsCPUSet(uint32_t sel); -extern uint32_t pmsCPUquery(void); -extern void pmsCPUConf(void); -extern void pmsCPUInit(void); - -#ifdef __cplusplus -extern "C" { -#endif - -extern kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc); -extern void pmsRun(uint32_t nstep); -extern void pmsRunLocal(uint32_t nstep); -extern void pmsPark(void); -extern void pmsStart(void); - -#ifdef __cplusplus -} -#endif - +#include #endif /* _PPC_PMS_H_ */ #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/pmsCPU.c b/osfmk/ppc/pmsCPU.c index 3350292f0..86d179fed 100644 --- a/osfmk/ppc/pmsCPU.c +++ b/osfmk/ppc/pmsCPU.c @@ -27,12 +27,15 @@ #include #include #include -#include +#include #include #include #include +static void pmsCPURemote(uint32_t nstep); + + pmsDef pmsDefault[] = { { .pmsLimit = century, /* We can normally stay here for 100 years */ @@ -180,6 +183,15 @@ void pmsCPUConf(void) { return; } +/* + * Machine-dependent initialization + */ +void +pmsCPUMachineInit(void) +{ + return; +} + /* * This function should be called once for each processor to force the * processor to the correct voltage and frequency. @@ -201,7 +213,7 @@ void pmsCPUInit(void) { return; } -uint32_t pmsCPUquery(void) { +uint32_t pmsCPUQuery(void) { uint32_t result; struct per_proc_info *pp; @@ -230,4 +242,67 @@ uint32_t pmsCPUquery(void) { return result; } +/* + * These are not implemented for PPC. + */ +void pmsCPUYellowFlag(void) { +} + +void pmsCPUGreenFlag(void) { +} + +uint32_t pmsCPUPackageQuery(void) +{ + /* multi-core CPUs are not supported. */ + return(~(uint32_t)0); +} + +/* + * Broadcast a change to all processors including ourselves. + * This must transition before broadcasting because we may block and end up on a different processor. + * + * This will block until all processors have transitioned, so + * obviously, this can block. + * + * Called with interruptions disabled. + * + */ + +void pmsCPURun(uint32_t nstep) { + + pmsRunLocal(nstep); /* If we aren't parking (we are already parked), transition ourselves */ + (void)cpu_broadcast(&pmsBroadcastWait, pmsCPURemote, nstep); /* Tell everyone else to do it too */ + + return; + +} + +/* + * Receive a broadcast and react. + * This is called from the interprocessor signal handler. + * We wake up the initiator after we are finished. + * + */ + +static void pmsCPURemote(uint32_t nstep) { + + pmsRunLocal(nstep); /* Go set the step */ + if(!hw_atomic_sub(&pmsBroadcastWait, 1)) { /* Drop the wait count */ + thread_wakeup((event_t)&pmsBroadcastWait); /* If we were the last, wake up the signaller */ + } + return; +} + +/* + * Control the Power Management Stepper. + * Called from user state by the superuser via a ppc system call. + * Interruptions disabled. + * + */ +int pmsCntrl(struct savearea *save) { + save->save_r3 = pmsControl(save->save_r3, (user_addr_t)(uintptr_t)save->save_r4, save->save_r5); + return 1; +} + + diff --git a/osfmk/ppc/ppc_init.c b/osfmk/ppc/ppc_init.c index f1dfa33b4..2af1e82df 100644 --- a/osfmk/ppc/ppc_init.c +++ b/osfmk/ppc/ppc_init.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include diff --git a/osfmk/ppc/ppc_vm_init.c b/osfmk/ppc/ppc_vm_init.c index cca618d4f..18b9ce46f 100644 --- a/osfmk/ppc/ppc_vm_init.c +++ b/osfmk/ppc/ppc_vm_init.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,6 @@ unsigned int hash_table_size; /* Hash table size */ int hash_table_shift; /* "ht_shift" boot arg, used to scale hash_table_size */ vm_offset_t taproot_addr; /* (BRINGUP) */ unsigned int taproot_size; /* (BRINGUP) */ -unsigned int serialmode; /* Serial mode keyboard and console control */ extern int disableConsoleOutput; struct shadowBAT shadow_BAT; @@ -234,13 +234,13 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) pmap_bootstrap(max_mem, &first_avail, kmapsize); pmap_map(trunc_page(exception_entry), trunc_page(exception_entry), - round_page(exception_end), VM_PROT_READ|VM_PROT_EXECUTE); + round_page(exception_end), VM_PROT_READ|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT); pmap_map(trunc_page(sectTEXTB), trunc_page(sectTEXTB), - round_page(sectTEXTB+sectSizeTEXT), VM_PROT_READ|VM_PROT_EXECUTE); + round_page(sectTEXTB+sectSizeTEXT), VM_PROT_READ|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT); pmap_map(trunc_page(sectDATAB), trunc_page(sectDATAB), - round_page(sectDATAB+sectSizeDATA), VM_PROT_READ|VM_PROT_WRITE); + round_page(sectDATAB+sectSizeDATA), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT); /* The KLD and LINKEDIT segments are unloaded in toto after boot completes, * but via ml_static_mfree(), through IODTFreeLoaderInfo(). Hence, we have @@ -252,7 +252,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) addr += PAGE_SIZE) { pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), - VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT, TRUE); } @@ -262,7 +262,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) addr += PAGE_SIZE) { pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), - VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT, TRUE); } @@ -273,7 +273,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), - VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT, TRUE); } @@ -283,7 +283,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) addr += PAGE_SIZE) { pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), - VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT, TRUE); } @@ -306,7 +306,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) for(addr = trunc_page(end); addr < round_page(static_memory_end); addr += PAGE_SIZE) { pmap_enter(kernel_pmap, (vm_map_address_t)addr, (ppnum_t)addr>>12, - VM_PROT_READ|VM_PROT_WRITE, + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT, TRUE); } diff --git a/osfmk/ppc/rtclock.c b/osfmk/ppc/rtclock.c index ecd5ee24f..6f0e68c40 100644 --- a/osfmk/ppc/rtclock.c +++ b/osfmk/ppc/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,11 +35,10 @@ #include #include +#include #include #include -#include - #include #include #include @@ -47,83 +46,13 @@ #include #include -#include - #include -int sysclk_config(void); - -int sysclk_init(void); - -kern_return_t sysclk_gettime( - mach_timespec_t *cur_time); - -kern_return_t sysclk_getattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t *count); - -void sysclk_setalarm( - mach_timespec_t *deadline); - -struct clock_ops sysclk_ops = { - sysclk_config, sysclk_init, - sysclk_gettime, 0, - sysclk_getattr, 0, - sysclk_setalarm, -}; - -int calend_config(void); - -kern_return_t calend_gettime( - mach_timespec_t *cur_time); - -kern_return_t calend_getattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t *count); - -struct clock_ops calend_ops = { - calend_config, 0, - calend_gettime, 0, - calend_getattr, 0, - 0, -}; - -/* local data declarations */ +int rtclock_config(void); -static struct rtclock_calend { - uint32_t epoch; - uint32_t microepoch; - - uint64_t epoch1; - - int64_t adjtotal; - int32_t adjdelta; -} rtclock_calend; - -static uint32_t rtclock_boottime; - -#define TIME_ADD(rsecs, secs, rfrac, frac, unit) \ -MACRO_BEGIN \ - if (((rfrac) += (frac)) >= (unit)) { \ - (rfrac) -= (unit); \ - (rsecs) += 1; \ - } \ - (rsecs) += (secs); \ -MACRO_END - -#define TIME_SUB(rsecs, secs, rfrac, frac, unit) \ -MACRO_BEGIN \ - if ((int32_t)((rfrac) -= (frac)) < 0) { \ - (rfrac) += (unit); \ - (rsecs) -= 1; \ - } \ - (rsecs) -= (secs); \ -MACRO_END +int rtclock_init(void); #define NSEC_PER_HZ (NSEC_PER_SEC / 100) -static uint32_t rtclock_tick_interval; static uint32_t rtclock_sec_divisor; @@ -131,20 +60,8 @@ static mach_timebase_info_data_t rtclock_timebase_const; static boolean_t rtclock_timebase_initialized; -static clock_timer_func_t rtclock_timer_expire; - -static timer_call_data_t rtclock_alarm_timer; - -static void nanotime_to_absolutetime( - uint32_t secs, - uint32_t nanosecs, - uint64_t *result); - -static void rtclock_alarm_expire( - timer_call_param_t p0, - timer_call_param_t p1); - -/* global data declarations */ +/* XXX this should really be in a header somewhere */ +extern clock_timer_func_t rtclock_timer_expire; decl_simple_lock_data(static,rtclock_lock) @@ -181,7 +98,7 @@ timebase_callback( LOCK_RTC(s); if (!rtclock_timebase_initialized) { - commpage_set_timestamp(0,0,0,0); + commpage_set_timestamp(0,0,0); rtclock_timebase_const.numer = numer; rtclock_timebase_const.denom = denom; @@ -205,13 +122,11 @@ timebase_callback( } /* - * Configure the real-time clock device. + * Configure the system clock device. */ int -sysclk_config(void) +rtclock_config(void) { - timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL); - simple_lock_init(&rtclock_lock, 0); PE_register_timebase_callback(timebase_callback); @@ -223,7 +138,7 @@ sysclk_config(void) * Initialize the system clock device. */ int -sysclk_init(void) +rtclock_init(void) { uint64_t abstime; struct per_proc_info *pp; @@ -231,30 +146,13 @@ sysclk_init(void) pp = getPerProc(); abstime = mach_absolute_time(); - pp->rtclock_tick_deadline = abstime + rtclock_tick_interval; /* Get the time we need to pop */ - pp->rtcPop = pp->rtclock_tick_deadline; /* Set the rtc pop time the same for now */ + pp->rtclock_intr_deadline = abstime + rtclock_tick_interval; /* Get the time we need to pop */ - (void)setTimerReq(); /* Start the timers going */ + etimer_resync_deadlines(); /* Start the timers going */ return (1); } -kern_return_t -sysclk_gettime( - mach_timespec_t *time) /* OUT */ -{ - uint64_t now, t64; - uint32_t divisor; - - now = mach_absolute_time(); - - time->tv_sec = t64 = now / (divisor = rtclock_sec_divisor); - now -= (t64 * divisor); - time->tv_nsec = (now * NSEC_PER_SEC) / divisor; - - return (KERN_SUCCESS); -} - void clock_get_system_microtime( uint32_t *secs, @@ -285,590 +183,29 @@ clock_get_system_nanotime( *nanosecs = (now * NSEC_PER_SEC) / divisor; } -/* - * Get clock device attributes. - */ -kern_return_t -sysclk_getattr( - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ -{ - spl_t s; - - if (*count != 1) - return (KERN_FAILURE); - - switch (flavor) { - - case CLOCK_GET_TIME_RES: /* >0 res */ - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - case CLOCK_ALARM_MINRES: - case CLOCK_ALARM_MAXRES: - LOCK_RTC(s); - *(clock_res_t *) attr = NSEC_PER_HZ; - UNLOCK_RTC(s); - break; - - default: - return (KERN_INVALID_VALUE); - } - - return (KERN_SUCCESS); -} - -/* - * Set deadline for the next alarm on the clock device. This call - * always resets the time to deliver an alarm for the clock. - */ -void -sysclk_setalarm( - mach_timespec_t *deadline) -{ - uint64_t abstime; - - nanotime_to_absolutetime(deadline->tv_sec, deadline->tv_nsec, &abstime); - timer_call_enter(&rtclock_alarm_timer, abstime); -} - -/* - * Configure the calendar clock. - */ -int -calend_config(void) -{ - return (1); -} - -/* - * Get the current clock time. - */ -kern_return_t -calend_gettime( - mach_timespec_t *time) /* OUT */ -{ - clock_get_calendar_nanotime( - &time->tv_sec, &time->tv_nsec); - - return (KERN_SUCCESS); -} - -/* - * Get clock device attributes. - */ -kern_return_t -calend_getattr( - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ -{ - spl_t s; - - if (*count != 1) - return (KERN_FAILURE); - - switch (flavor) { - - case CLOCK_GET_TIME_RES: /* >0 res */ - LOCK_RTC(s); - *(clock_res_t *) attr = NSEC_PER_HZ; - UNLOCK_RTC(s); - break; - - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - case CLOCK_ALARM_MINRES: - case CLOCK_ALARM_MAXRES: - *(clock_res_t *) attr = 0; - break; - - default: - return (KERN_INVALID_VALUE); - } - - return (KERN_SUCCESS); -} - void -clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs) +clock_gettimeofday_set_commpage( + uint64_t abstime, + uint64_t epoch, + uint64_t offset, + uint32_t *secs, + uint32_t *microsecs) { - uint32_t epoch, microepoch; - uint64_t now, t64; - spl_t s = splclock(); + uint64_t t64, now = abstime; simple_lock(&rtclock_lock); - if (rtclock_calend.adjdelta >= 0) { - uint32_t divisor; - - now = mach_absolute_time(); - - epoch = rtclock_calend.epoch; - microepoch = rtclock_calend.microepoch; + now += offset; - simple_unlock(&rtclock_lock); + *secs = t64 = now / rtclock_sec_divisor; + now -= (t64 * rtclock_sec_divisor); + *microsecs = (now * USEC_PER_SEC) / rtclock_sec_divisor; - *secs = t64 = now / (divisor = rtclock_sec_divisor); - now -= (t64 * divisor); - *microsecs = (now * USEC_PER_SEC) / divisor; + *secs += epoch; - TIME_ADD(*secs, epoch, *microsecs, microepoch, USEC_PER_SEC); - } - else { - uint32_t delta, t32; - - delta = -rtclock_calend.adjdelta; - - now = mach_absolute_time(); - - *secs = rtclock_calend.epoch; - *microsecs = rtclock_calend.microepoch; - - if (now > rtclock_calend.epoch1) { - t64 = now - rtclock_calend.epoch1; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - if (t32 > delta) - TIME_ADD(*secs, 0, *microsecs, (t32 - delta), USEC_PER_SEC); - } - - simple_unlock(&rtclock_lock); - } - - splx(s); -} - -/* This is only called from the gettimeofday() syscall. As a side - * effect, it updates the commpage timestamp. Otherwise it is - * identical to clock_get_calendar_microtime(). Because most - * gettimeofday() calls are handled by the commpage in user mode, - * this routine should be infrequently used except when slowing down - * the clock. - */ -void -clock_gettimeofday( - uint32_t *secs_p, - uint32_t *microsecs_p) -{ - uint32_t epoch, microepoch; - uint32_t secs, microsecs; - uint64_t now, t64, secs_64, usec_64; - spl_t s = splclock(); - - simple_lock(&rtclock_lock); - - if (rtclock_calend.adjdelta >= 0) { - now = mach_absolute_time(); - - epoch = rtclock_calend.epoch; - microepoch = rtclock_calend.microepoch; - - secs = secs_64 = now / rtclock_sec_divisor; - t64 = now - (secs_64 * rtclock_sec_divisor); - microsecs = usec_64 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_ADD(secs, epoch, microsecs, microepoch, USEC_PER_SEC); - - /* adjust "now" to be absolute time at _start_ of usecond */ - now -= t64 - ((usec_64 * rtclock_sec_divisor) / USEC_PER_SEC); - - commpage_set_timestamp(now,secs,microsecs,rtclock_sec_divisor); - } - else { - uint32_t delta, t32; - - delta = -rtclock_calend.adjdelta; - - now = mach_absolute_time(); - - secs = rtclock_calend.epoch; - microsecs = rtclock_calend.microepoch; - - if (now > rtclock_calend.epoch1) { - t64 = now - rtclock_calend.epoch1; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - if (t32 > delta) - TIME_ADD(secs, 0, microsecs, (t32 - delta), USEC_PER_SEC); - } - - /* no need to disable timestamp, it is already off */ - } - - simple_unlock(&rtclock_lock); - splx(s); - - *secs_p = secs; - *microsecs_p = microsecs; -} - -void -clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs) -{ - uint32_t epoch, nanoepoch; - uint64_t now, t64; - spl_t s = splclock(); - - simple_lock(&rtclock_lock); - - if (rtclock_calend.adjdelta >= 0) { - uint32_t divisor; - - now = mach_absolute_time(); - - epoch = rtclock_calend.epoch; - nanoepoch = rtclock_calend.microepoch * NSEC_PER_USEC; - - simple_unlock(&rtclock_lock); - - *secs = t64 = now / (divisor = rtclock_sec_divisor); - now -= (t64 * divisor); - *nanosecs = ((now * USEC_PER_SEC) / divisor) * NSEC_PER_USEC; - - TIME_ADD(*secs, epoch, *nanosecs, nanoepoch, NSEC_PER_SEC); - } - else { - uint32_t delta, t32; - - delta = -rtclock_calend.adjdelta; - - now = mach_absolute_time(); - - *secs = rtclock_calend.epoch; - *nanosecs = rtclock_calend.microepoch * NSEC_PER_USEC; - - if (now > rtclock_calend.epoch1) { - t64 = now - rtclock_calend.epoch1; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - if (t32 > delta) - TIME_ADD(*secs, 0, *nanosecs, ((t32 - delta) * NSEC_PER_USEC), NSEC_PER_SEC); - } - - simple_unlock(&rtclock_lock); - } - - splx(s); -} - -void -clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs) -{ - uint32_t sys, microsys; - uint32_t newsecs; - spl_t s; - - newsecs = (microsecs < 500*USEC_PER_SEC)? - secs: secs + 1; - - s = splclock(); - simple_lock(&rtclock_lock); - - commpage_set_timestamp(0,0,0,0); - - /* - * Cancel any adjustment in progress. - */ - if (rtclock_calend.adjdelta < 0) { - uint64_t now, t64; - uint32_t delta, t32; - - delta = -rtclock_calend.adjdelta; - - sys = rtclock_calend.epoch; - microsys = rtclock_calend.microepoch; - - now = mach_absolute_time(); - - if (now > rtclock_calend.epoch1) - t64 = now - rtclock_calend.epoch1; - else - t64 = 0; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - if (t32 > delta) - TIME_ADD(sys, 0, microsys, (t32 - delta), USEC_PER_SEC); - - rtclock_calend.epoch = sys; - rtclock_calend.microepoch = microsys; - - sys = t64 = now / rtclock_sec_divisor; - now -= (t64 * rtclock_sec_divisor); - microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); - } - - rtclock_calend.epoch1 = 0; - rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; - - /* - * Calculate the new calendar epoch based on - * the new value and the system clock. - */ - clock_get_system_microtime(&sys, µsys); - TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); - - /* - * Adjust the boottime based on the delta. - */ - rtclock_boottime += secs - rtclock_calend.epoch; - - /* - * Set the new calendar epoch. - */ - rtclock_calend.epoch = secs; - rtclock_calend.microepoch = microsecs; + commpage_set_timestamp(abstime - now, *secs, rtclock_sec_divisor); simple_unlock(&rtclock_lock); - - /* - * Set the new value for the platform clock. - */ - PESetGMTTimeOfDay(newsecs); - - splx(s); - - /* - * Send host notifications. - */ - host_notify_calendar_change(); -} - -#define tickadj (40) /* "standard" skew, us / tick */ -#define bigadj (USEC_PER_SEC) /* use 10x skew above bigadj us */ - -uint32_t -clock_set_calendar_adjtime( - int32_t *secs, - int32_t *microsecs) -{ - int64_t total, ototal; - uint32_t interval = 0; - spl_t s; - - total = (int64_t)*secs * USEC_PER_SEC + *microsecs; - - LOCK_RTC(s); - commpage_set_timestamp(0,0,0,0); - - ototal = rtclock_calend.adjtotal; - - if (rtclock_calend.adjdelta < 0) { - uint64_t now, t64; - uint32_t delta, t32; - uint32_t sys, microsys; - - delta = -rtclock_calend.adjdelta; - - sys = rtclock_calend.epoch; - microsys = rtclock_calend.microepoch; - - now = mach_absolute_time(); - - if (now > rtclock_calend.epoch1) - t64 = now - rtclock_calend.epoch1; - else - t64 = 0; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - if (t32 > delta) - TIME_ADD(sys, 0, microsys, (t32 - delta), USEC_PER_SEC); - - rtclock_calend.epoch = sys; - rtclock_calend.microepoch = microsys; - - sys = t64 = now / rtclock_sec_divisor; - now -= (t64 * rtclock_sec_divisor); - microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); - } - - if (total != 0) { - int32_t delta = tickadj; - - if (total > 0) { - if (total > bigadj) - delta *= 10; - if (delta > total) - delta = total; - - rtclock_calend.epoch1 = 0; - } - else { - uint64_t now, t64; - uint32_t sys, microsys; - - if (total < -bigadj) - delta *= 10; - delta = -delta; - if (delta < total) - delta = total; - - rtclock_calend.epoch1 = now = mach_absolute_time(); - - sys = t64 = now / rtclock_sec_divisor; - now -= (t64 * rtclock_sec_divisor); - microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_ADD(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); - } - - rtclock_calend.adjtotal = total; - rtclock_calend.adjdelta = delta; - - interval = rtclock_tick_interval; - } - else { - rtclock_calend.epoch1 = 0; - rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; - } - - UNLOCK_RTC(s); - - if (ototal == 0) - *secs = *microsecs = 0; - else { - *secs = ototal / USEC_PER_SEC; - *microsecs = ototal % USEC_PER_SEC; - } - - return (interval); -} - -uint32_t -clock_adjust_calendar(void) -{ - uint32_t interval = 0; - int32_t delta; - spl_t s; - - LOCK_RTC(s); - commpage_set_timestamp(0,0,0,0); - - delta = rtclock_calend.adjdelta; - - if (delta > 0) { - TIME_ADD(rtclock_calend.epoch, 0, rtclock_calend.microepoch, delta, USEC_PER_SEC); - - rtclock_calend.adjtotal -= delta; - if (delta > rtclock_calend.adjtotal) - rtclock_calend.adjdelta = rtclock_calend.adjtotal; - } - else - if (delta < 0) { - uint64_t now, t64; - uint32_t t32; - - now = mach_absolute_time(); - - if (now > rtclock_calend.epoch1) - t64 = now - rtclock_calend.epoch1; - else - t64 = 0; - - rtclock_calend.epoch1 = now; - - t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_ADD(rtclock_calend.epoch, 0, rtclock_calend.microepoch, (t32 + delta), USEC_PER_SEC); - - rtclock_calend.adjtotal -= delta; - if (delta < rtclock_calend.adjtotal) - rtclock_calend.adjdelta = rtclock_calend.adjtotal; - - if (rtclock_calend.adjdelta == 0) { - uint32_t sys, microsys; - - sys = t64 = now / rtclock_sec_divisor; - now -= (t64 * rtclock_sec_divisor); - microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - - TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); - - rtclock_calend.epoch1 = 0; - } - } - - if (rtclock_calend.adjdelta != 0) - interval = rtclock_tick_interval; - - UNLOCK_RTC(s); - - return (interval); -} - -/* - * clock_initialize_calendar: - * - * Set the calendar and related clocks - * from the platform clock at boot or - * wake event. - */ -void -clock_initialize_calendar(void) -{ - uint32_t sys, microsys; - uint32_t microsecs = 0, secs = PEGetGMTTimeOfDay(); - spl_t s; - - LOCK_RTC(s); - commpage_set_timestamp(0,0,0,0); - - if ((int32_t)secs >= (int32_t)rtclock_boottime) { - /* - * Initialize the boot time based on the platform clock. - */ - if (rtclock_boottime == 0) - rtclock_boottime = secs; - - /* - * Calculate the new calendar epoch based - * on the platform clock and the system - * clock. - */ - clock_get_system_microtime(&sys, µsys); - TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); - - /* - * Set the new calendar epoch. - */ - rtclock_calend.epoch = secs; - rtclock_calend.microepoch = microsecs; - - /* - * Cancel any adjustment in progress. - */ - rtclock_calend.epoch1 = 0; - rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; - } - - UNLOCK_RTC(s); - - /* - * Send host notifications. - */ - host_notify_calendar_change(); -} - -void -clock_get_boottime_nanotime( - uint32_t *secs, - uint32_t *nanosecs) -{ - *secs = rtclock_boottime; - *nanosecs = 0; } void @@ -878,37 +215,11 @@ clock_timebase_info( spl_t s; LOCK_RTC(s); - rtclock_timebase_initialized = TRUE; *info = rtclock_timebase_const; + rtclock_timebase_initialized = TRUE; UNLOCK_RTC(s); } -void -clock_set_timer_deadline( - uint64_t deadline) -{ - int decr; - uint64_t abstime; - rtclock_timer_t *mytimer; - struct per_proc_info *pp; - spl_t s; - - s = splclock(); - pp = getPerProc(); - mytimer = &pp->rtclock_timer; - mytimer->deadline = deadline; - - if (!mytimer->has_expired && (deadline < pp->rtclock_tick_deadline)) { /* Has the timer already expired or is less that set? */ - pp->rtcPop = deadline; /* Yes, set the new rtc pop time */ - decr = setTimerReq(); /* Start the timers going */ - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) - | DBG_FUNC_NONE, decr, 2, 0, 0, 0); - } - - splx(s); -} - void clock_set_timer_func( clock_timer_func_t func) @@ -921,91 +232,20 @@ clock_set_timer_func( UNLOCK_RTC(s); } -/* - * Real-time clock device interrupt. - */ void -rtclock_intr(struct savearea *ssp) { - - uint64_t abstime; - int decr; - rtclock_timer_t *mytimer; - struct per_proc_info *pp; - - pp = getPerProc(); - mytimer = &pp->rtclock_timer; - - abstime = mach_absolute_time(); - if (pp->rtclock_tick_deadline <= abstime) { /* Have we passed the pop time? */ - clock_deadline_for_periodic_event(rtclock_tick_interval, abstime, - &pp->rtclock_tick_deadline); - hertz_tick(USER_MODE(ssp->save_srr1), ssp->save_srr0); - abstime = mach_absolute_time(); /* Refresh the current time since we went away */ - } - - if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */ - mytimer->has_expired = TRUE; /* Remember that we popped */ - mytimer->deadline = EndOfAllTime; /* Set timer request to the end of all time in case we have no more events */ - (*rtclock_timer_expire)(abstime); /* Process pop */ - mytimer->has_expired = FALSE; - } - - pp->rtcPop = (pp->rtclock_tick_deadline < mytimer->deadline) ? /* Get shortest pop */ - pp->rtclock_tick_deadline : /* It was the periodic timer */ - mytimer->deadline; /* Actually, an event request */ - - decr = setTimerReq(); /* Request the timer pop */ - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) - | DBG_FUNC_NONE, decr, 3, 0, 0, 0); -} - -/* - * Request an interruption at a specific time - * - * Sets the decrementer to pop at the right time based on the timebase. - * The value is chosen by comparing the rtc request with the power management. - * request. We may add other values at a future time. - * - */ - -int setTimerReq(void) { - - struct per_proc_info *pp; - int decr; - uint64_t nexttime; - - pp = getPerProc(); /* Get per_proc */ - - nexttime = pp->rtcPop; /* Assume main timer */ - - decr = setPop((pp->pms.pmsPop < nexttime) ? pp->pms.pmsPop : nexttime); /* Schedule timer pop */ - - return decr; /* Pass back what we actually set */ -} - -static void -rtclock_alarm_expire( - __unused void *p0, - __unused void *p1) -{ - mach_timespec_t timestamp; - - (void) sysclk_gettime(×tamp); - - clock_alarm_intr(SYSTEM_CLOCK, ×tamp); -} - -static void -nanotime_to_absolutetime( - uint32_t secs, - uint32_t nanosecs, +clock_interval_to_absolutetime_interval( + uint32_t interval, + uint32_t scale_factor, uint64_t *result) { - uint32_t divisor = rtclock_sec_divisor; + uint64_t nanosecs = (uint64_t)interval * scale_factor; + uint64_t t64; + uint32_t divisor; - *result = ((uint64_t)secs * divisor) + - ((uint64_t)nanosecs * divisor) / NSEC_PER_SEC; + *result = (t64 = nanosecs / NSEC_PER_SEC) * + (divisor = rtclock_sec_divisor); + nanosecs -= (t64 * NSEC_PER_SEC); + *result += (nanosecs * divisor) / NSEC_PER_SEC; } void @@ -1023,44 +263,29 @@ absolutetime_to_microtime( } void -clock_interval_to_deadline( - uint32_t interval, - uint32_t scale_factor, - uint64_t *result) -{ - uint64_t abstime; - - clock_get_uptime(result); - - clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); - - *result += abstime; -} - -void -clock_interval_to_absolutetime_interval( - uint32_t interval, - uint32_t scale_factor, - uint64_t *result) +absolutetime_to_nanotime( + uint64_t abstime, + uint32_t *secs, + uint32_t *nanosecs) { - uint64_t nanosecs = (uint64_t)interval * scale_factor; - uint64_t t64; - uint32_t divisor; + uint64_t t64; + uint32_t divisor; - *result = (t64 = nanosecs / NSEC_PER_SEC) * - (divisor = rtclock_sec_divisor); - nanosecs -= (t64 * NSEC_PER_SEC); - *result += (nanosecs * divisor) / NSEC_PER_SEC; + *secs = t64 = abstime / (divisor = rtclock_sec_divisor); + abstime -= (t64 * divisor); + *nanosecs = (abstime * NSEC_PER_SEC) / divisor; } void -clock_absolutetime_interval_to_deadline( - uint64_t abstime, +nanotime_to_absolutetime( + uint32_t secs, + uint32_t nanosecs, uint64_t *result) { - clock_get_uptime(result); + uint32_t divisor = rtclock_sec_divisor; - *result += abstime; + *result = ((uint64_t)secs * divisor) + + ((uint64_t)nanosecs * divisor) / NSEC_PER_SEC; } void @@ -1100,4 +325,3 @@ machine_delay_until( now = mach_absolute_time(); } while (now < deadline); } - diff --git a/osfmk/ppc/rtclock.h b/osfmk/ppc/rtclock.h index 4c2800d7d..3e5c04262 100644 --- a/osfmk/ppc/rtclock.h +++ b/osfmk/ppc/rtclock.h @@ -34,10 +34,11 @@ #ifndef _PPC_RTCLOCK_H_ #define _PPC_RTCLOCK_H_ +#include + #define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL extern void rtclock_intr(struct savearea *ssp); -extern int setTimerReq(void); #pragma pack(push,4) struct rtclock_timer_t { diff --git a/osfmk/ppc/serial_io.c b/osfmk/ppc/serial_io.c index 176ff88f3..f944d2b4a 100644 --- a/osfmk/ppc/serial_io.c +++ b/osfmk/ppc/serial_io.c @@ -101,8 +101,6 @@ extern unsigned int disableSerialOuput; int serial_initted = 0; unsigned int scc_parm_done = 0; -extern unsigned int serialmode; - static struct scc_byte { unsigned char reg; unsigned char val; @@ -399,6 +397,17 @@ scc_getc(int unit, int line, boolean_t wait, boolean_t raw) return c; } + +/* + * This front-ends scc_getc to make some intel changes easier + */ + +int _serial_getc(int unit, int line, boolean_t wait, boolean_t raw) { + + return(scc_getc(unit, line, wait, raw)); + +} + /* * Put a char on a specific SCC line * use splhigh since we might be doing a printf in high spl'd code @@ -647,55 +656,5 @@ scc_param(struct scc_tty *tp) } -/* - * This routine will start a thread that polls the serial port, listening for - * characters that have been typed. - */ - -void -serial_keyboard_init(void) -{ - kern_return_t result; - thread_t thread; - - if(!(serialmode & 2)) return; /* Leave if we do not want a serial console */ - - kprintf("Serial keyboard started\n"); - result = kernel_thread_start_priority((thread_continue_t)serial_keyboard_start, NULL, MAXPRI_KERNEL, &thread); - if (result != KERN_SUCCESS) - panic("serial_keyboard_init"); - - thread_deallocate(thread); -} - -void -serial_keyboard_start(void) -{ - serial_keyboard_poll(); /* Go see if there are any characters pending now */ - panic("serial_keyboard_start: we can't get back here\n"); -} - -static int ptestxxx = 0; - -void -serial_keyboard_poll(void) -{ - int chr; - uint64_t next; - extern void cons_cinput(char ch); /* The BSD routine that gets characters */ - - - while(1) { /* Do this for a while */ - chr = scc_getc(0, 1, 0, 1); /* Get a character if there is one */ - if(chr < 0) break; /* The serial buffer is empty */ - cons_cinput((char)chr); /* Buffer up the character */ - } - - clock_interval_to_deadline(16, 1000000, &next); /* Get time of pop */ - - assert_wait_deadline((event_t)serial_keyboard_poll, THREAD_UNINT, next); /* Show we are "waiting" */ - thread_block((thread_continue_t)serial_keyboard_poll); /* Wait for it */ - panic("serial_keyboard_poll: Shouldn't never ever get here...\n"); -} #endif /* NSCC > 0 */ diff --git a/osfmk/ppc/serial_io.h b/osfmk/ppc/serial_io.h index 0b3e4756b..6fe428724 100644 --- a/osfmk/ppc/serial_io.h +++ b/osfmk/ppc/serial_io.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -112,21 +112,6 @@ extern int scc_getc( boolean_t wait, boolean_t raw); -/* Functions in serial_console.c for switching between serial and video - consoles. */ -extern boolean_t console_is_serial(void); -extern int switch_to_serial_console( - void); - -extern int switch_to_video_console( - void); - -extern void switch_to_old_console( - int old_console); - -void serial_keyboard_init(void); -void serial_keyboard_start(void); -void serial_keyboard_poll(void); /* diff --git a/osfmk/ppc/skiplists.s b/osfmk/ppc/skiplists.s index d5653260d..f0cb982ca 100644 --- a/osfmk/ppc/skiplists.s +++ b/osfmk/ppc/skiplists.s @@ -362,7 +362,7 @@ LEXT(mapSearchFull) ; r7 = current skip list number * 8 ; r8 = ptr to skip list vector of mapping pointed to by r9 ; r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap) - ; r10 = lowest expected next va, 0 at the beginning of the search + ; r10 = lowest expected next va, 0 at the beginning of the search ; r12 = ptr to the skipListPrev vector in the per-proc .align 5 @@ -445,7 +445,7 @@ mapSrchFull64Found: ; WARNING: can drop down to here ; r7 = current skip list number * 8 ; r8 = ptr to skip list vector of mapping pointed to by r9 ; r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap) - ; r10 = lowest expected next va, 0 at the beginning of the search + ; r10 = lowest expected next va, 0 at the beginning of the search ; r12 = ptr to the skipListPrev vector in the per-proc .align 4 @@ -474,7 +474,7 @@ mapSrchFull32b: la r8,mpList0+4(r3) ; point to skip list vector in this mapping mr r9,r3 ; current becomes previous lwzx r3,r7,r8 ; get ptr to next mapping in current list - addi r10,r4,0x1000 ; Get the lowest VA we can get next + addi r10,r4,0x1000 ; Get the lowest VA we can get next mapSrchFull32c: mr. r3,r3 ; next becomes current bne+ mapSrchFull32a ; was another, so loop diff --git a/osfmk/ppc/status.c b/osfmk/ppc/status.c index d481829fe..e3e975a8d 100644 --- a/osfmk/ppc/status.c +++ b/osfmk/ppc/status.c @@ -1324,6 +1324,19 @@ thread_adjuserstack(thread_t thread, int adjust) } +void +thread_setsinglestep(thread_t thread, int on) +{ + savearea *sv; + + sv = get_user_regs(thread); /* Get the user state registers */ + + if (on) + sv->save_srr1 |= MASK(MSR_SE); + else + sv->save_srr1 &= ~MASK(MSR_SE); +} + /* * thread_setentrypoint: * diff --git a/osfmk/ppc/trap.c b/osfmk/ppc/trap.c index 1dafff22a..774ab23da 100644 --- a/osfmk/ppc/trap.c +++ b/osfmk/ppc/trap.c @@ -82,6 +82,7 @@ extern int not_in_kdp; #define PROT_RO (VM_PROT_READ) #define PROT_RW (VM_PROT_READ|VM_PROT_WRITE) + /* A useful macro to update the ppc_exception_state in the PCB * before calling doexception */ @@ -128,11 +129,11 @@ struct savearea *trap(int trapno, myast = ast_pending(); if(perfASTHook) { - if(*myast & AST_PPC_CHUD_ALL) { + if(*myast & AST_CHUD_ALL) { perfASTHook(trapno, ssp, dsisr, (unsigned int)dar); } } else { - *myast &= ~AST_PPC_CHUD_ALL; + *myast &= ~AST_CHUD_ALL; } if(perfTrapHook) { /* Is there a hook? */ @@ -369,7 +370,7 @@ struct savearea *trap(int trapno, map = kernel_map; code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0), - PROT_EXEC, FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0)); + (PROT_EXEC | PROT_RO), FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0)); if (code != KERN_SUCCESS) { unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL); @@ -598,7 +599,7 @@ struct savearea *trap(int trapno, map = thread->map; code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0), - PROT_EXEC, FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0)); + (PROT_EXEC | PROT_RO), FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0)); if ((code != KERN_SUCCESS) && (code != KERN_ABORTED)) { UPDATE_PPC_EXCEPTION_STATE; diff --git a/osfmk/ppc/vmachmon.c b/osfmk/ppc/vmachmon.c index c4583336b..3eeee08a0 100644 --- a/osfmk/ppc/vmachmon.c +++ b/osfmk/ppc/vmachmon.c @@ -616,7 +616,7 @@ int vmm_init_context(struct savearea *save) hw_atomic_add((int *)&saveanchor.savetarget, 2); /* Account for the number of extra saveareas we think we might "need" */ pmap_t hpmap = act->map->pmap; /* Get host pmap */ - pmap_t gpmap = pmap_create(0); /* Make a fresh guest pmap */ + pmap_t gpmap = pmap_create(0, FALSE); /* Make a fresh guest pmap */ if (gpmap) { /* Did we succeed ? */ CTable->vmmAdsp[cvi] = gpmap; /* Remember guest pmap for new context */ if (lowGlo.lgVMMforcedFeats & vmmGSA) { /* Forcing on guest shadow assist ? */ @@ -975,7 +975,7 @@ kern_return_t vmm_map_page( map = current_thread()->map; /* Get the host's map */ if (pmap->pmapFlags & pmapVMgsaa) { /* Guest shadow assist active ? */ - ret = hw_res_map_gv(map->pmap, pmap, cva, ava, getProtPPC(prot)); + ret = hw_res_map_gv(map->pmap, pmap, cva, ava, getProtPPC(prot, TRUE)); /* Attempt to resume an existing gv->phys mapping */ if (mapRtOK != ret) { /* Nothing to resume, construct a new mapping */ @@ -1017,7 +1017,7 @@ kern_return_t vmm_map_page( if (pattr & mmFlgCInhib) wimg |= 0x4; if (pattr & mmFlgGuarded) wimg |= 0x1; unsigned int mflags = (pindex << 16) | mpGuest; - addr64_t gva = ((ava & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot)); + addr64_t gva = ((ava & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot, TRUE)); hw_add_map_gv(map->pmap, pmap, gva, mflags, mp->mpPAddr); /* Construct new guest->phys mapping */ diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index fa1cfa94b..ef9254714 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -43,7 +43,6 @@ #include #include -#include #include #include #include @@ -108,11 +107,22 @@ get_vm_end( * BSD VNODE PAGER */ -/* until component support available */ -int vnode_pager_workaround; +const struct memory_object_pager_ops vnode_pager_ops = { + vnode_pager_reference, + vnode_pager_deallocate, + vnode_pager_init, + vnode_pager_terminate, + vnode_pager_data_request, + vnode_pager_data_return, + vnode_pager_data_initialize, + vnode_pager_data_unlock, + vnode_pager_synchronize, + vnode_pager_unmap, + "vnode pager" +}; typedef struct vnode_pager { - int *pager; /* pager workaround pointer */ + memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */ unsigned int pager_ikot; /* JMM: fake ip_kotype() */ unsigned int ref_count; /* reference count */ memory_object_control_t control_handle; /* mem object control handle */ @@ -203,6 +213,11 @@ macx_triggers( return EINVAL; } + if (default_pager_init_flag == 0) { + start_def_pager(NULL); + default_pager_init_flag = 1; + } + if (flags & SWAP_ENCRYPT_ON) { /* ENCRYPTED SWAP: tell default_pager to encrypt */ default_pager_triggers(default_pager, @@ -424,6 +439,9 @@ vnode_pager_bootstrap(void) size = (vm_size_t) sizeof(struct vnode_pager); vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size, PAGE_SIZE, "vnode pager structures"); +#ifdef __i386__ + apple_protect_pager_bootstrap(); +#endif /* __i386__ */ return; } @@ -537,12 +555,53 @@ vnode_pager_get_object_size( { vnode_pager_t vnode_object; + if (mem_obj->mo_pager_ops != &vnode_pager_ops) { + *length = 0; + return KERN_INVALID_ARGUMENT; + } + vnode_object = vnode_pager_lookup(mem_obj); *length = vnode_pager_get_filesize(vnode_object->vnode_handle); return KERN_SUCCESS; } +kern_return_t +vnode_pager_get_object_pathname( + memory_object_t mem_obj, + char *pathname, + vm_size_t *length_p) +{ + vnode_pager_t vnode_object; + + if (mem_obj->mo_pager_ops != &vnode_pager_ops) { + return KERN_INVALID_ARGUMENT; + } + + vnode_object = vnode_pager_lookup(mem_obj); + + return vnode_pager_get_pathname(vnode_object->vnode_handle, + pathname, + length_p); +} + +kern_return_t +vnode_pager_get_object_filename( + memory_object_t mem_obj, + char **filename) +{ + vnode_pager_t vnode_object; + + if (mem_obj->mo_pager_ops != &vnode_pager_ops) { + return KERN_INVALID_ARGUMENT; + } + + vnode_object = vnode_pager_lookup(mem_obj); + + return vnode_pager_get_filename(vnode_object->vnode_handle, + filename); +} + /* * */ @@ -807,7 +866,7 @@ vnode_pager_cluster_read( if (kret == 1) { int uplflags; upl_t upl = NULL; - int count = 0; + unsigned int count = 0; kern_return_t kr; uplflags = (UPL_NO_SYNC | @@ -846,7 +905,7 @@ vnode_pager_release_from_cache( int *cnt) { memory_object_free_from_cache( - &realhost, &vnode_pager_workaround, cnt); + &realhost, &vnode_pager_ops, cnt); } /* @@ -869,7 +928,7 @@ vnode_object_create( * we reserve the second word in the object for a fake ip_kotype * setting - that will tell vm_map to use it as a memory object. */ - vnode_object->pager = &vnode_pager_workaround; + vnode_object->pager_ops = &vnode_pager_ops; vnode_object->pager_ikot = IKOT_MEMORY_OBJECT; vnode_object->ref_count = 1; vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; @@ -888,7 +947,193 @@ vnode_pager_lookup( vnode_pager_t vnode_object; vnode_object = (vnode_pager_t)name; - assert(vnode_object->pager == &vnode_pager_workaround); + assert(vnode_object->pager_ops == &vnode_pager_ops); return (vnode_object); } + +/*********************** proc_info implementation *************/ + +#include + +static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid); + + +int +fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid) +{ + + vm_map_t map = task->map; + vm_map_offset_t address = (vm_map_offset_t )arg; + vm_map_entry_t tmp_entry; + vm_map_entry_t entry; + vm_map_offset_t start; + vm_region_extended_info_data_t extended; + vm_region_top_info_data_t top; + + + if (map == VM_MAP_NULL) + return(0); + + vm_map_lock_read(map); + + start = address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(0); + } + } else { + entry = tmp_entry; + } + + start = entry->vme_start; + + pinfo->pri_offset = entry->offset; + pinfo->pri_protection = entry->protection; + pinfo->pri_max_protection = entry->max_protection; + pinfo->pri_inheritance = entry->inheritance; + pinfo->pri_behavior = entry->behavior; + pinfo->pri_user_wired_count = entry->user_wired_count; + pinfo->pri_user_tag = entry->alias; + + if (entry->is_sub_map) { + pinfo->pri_flags |= PROC_REGION_SUBMAP; + } else { + if (entry->is_shared) + pinfo->pri_flags |= PROC_REGION_SHARED; + } + + + extended.protection = entry->protection; + extended.user_tag = entry->alias; + extended.pages_resident = 0; + extended.pages_swapped_out = 0; + extended.pages_shared_now_private = 0; + extended.pages_dirtied = 0; + extended.external_pager = 0; + extended.shadow_depth = 0; + + vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); + + if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) + extended.share_mode = SM_PRIVATE; + + top.private_pages_resident = 0; + top.shared_pages_resident = 0; + vm_map_region_top_walk(entry, &top); + + + pinfo->pri_pages_resident = extended.pages_resident; + pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; + pinfo->pri_pages_swapped_out = extended.pages_swapped_out; + pinfo->pri_pages_dirtied = extended.pages_dirtied; + pinfo->pri_ref_count = extended.ref_count; + pinfo->pri_shadow_depth = extended.shadow_depth; + pinfo->pri_share_mode = extended.share_mode; + + pinfo->pri_private_pages_resident = top.private_pages_resident; + pinfo->pri_shared_pages_resident = top.shared_pages_resident; + pinfo->pri_obj_id = top.obj_id; + + pinfo->pri_address = (uint64_t)start; + pinfo->pri_size = (uint64_t)(entry->vme_end - start); + pinfo->pri_depth = 0; + + if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { + *vnodeaddr = (uint32_t)0; + + if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { + vm_map_unlock_read(map); + return(1); + } + } + + vm_map_unlock_read(map); + return(1); +} + +static int +fill_vnodeinfoforaddr( + vm_map_entry_t entry, + uint32_t * vnodeaddr, + uint32_t * vid) +{ + vm_object_t top_object, object; + memory_object_t memory_object; + memory_object_pager_ops_t pager_ops; + kern_return_t kr; + int shadow_depth; + + + if (entry->is_sub_map) { + return(0); + } else { + /* + * The last object in the shadow chain has the + * relevant pager information. + */ + top_object = entry->object.vm_object; + if (top_object == VM_OBJECT_NULL) { + object = VM_OBJECT_NULL; + shadow_depth = 0; + } else { + vm_object_lock(top_object); + for (object = top_object, shadow_depth = 0; + object->shadow != VM_OBJECT_NULL; + object = object->shadow, shadow_depth++) { + vm_object_lock(object->shadow); + vm_object_unlock(object); + } + } + } + + if (object == VM_OBJECT_NULL) { + return(0); + } else if (object->internal) { + vm_object_unlock(object); + return(0); + } else if (! object->pager_ready || + object->terminating || + ! object->alive) { + vm_object_unlock(object); + return(0); + } else { + memory_object = object->pager; + pager_ops = memory_object->mo_pager_ops; + if (pager_ops == &vnode_pager_ops) { + kr = vnode_pager_get_object_vnode( + memory_object, + vnodeaddr, vid); + if (kr != KERN_SUCCESS) { + vm_object_unlock(object); + return(0); + } + } else { + vm_object_unlock(object); + return(0); + } + } + vm_object_unlock(object); + return(1); +} + +kern_return_t +vnode_pager_get_object_vnode ( + memory_object_t mem_obj, + uint32_t * vnodeaddr, + uint32_t * vid) +{ + vnode_pager_t vnode_object; + + vnode_object = vnode_pager_lookup(mem_obj); + if (vnode_object->vnode_handle) { + *vnodeaddr = (uint32_t)vnode_object->vnode_handle; + *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); + + return(KERN_SUCCESS); + } + + return(KERN_FAILURE); +} + diff --git a/osfmk/vm/device_vm.c b/osfmk/vm/device_vm.c index 464220034..807d10b2d 100644 --- a/osfmk/vm/device_vm.c +++ b/osfmk/vm/device_vm.c @@ -57,12 +57,27 @@ /* until component support available */ -int device_pager_workaround; +const struct memory_object_pager_ops device_pager_ops = { + device_pager_reference, + device_pager_deallocate, + device_pager_init, + device_pager_terminate, + device_pager_data_request, + device_pager_data_return, + device_pager_data_initialize, + device_pager_data_unlock, + device_pager_synchronize, + device_pager_unmap, + "device pager" +}; typedef int device_port_t; +/* + * The start of "struct device_pager" MUST match a "struct memory_object". + */ typedef struct device_pager { - int *pager; /* pager workaround pointer */ + memory_object_pager_ops_t pager_ops; /* == &device_pager_ops */ unsigned int pager_ikot; /* fake ip_kotype() */ unsigned int ref_count; /* reference count */ memory_object_control_t control_handle; /* mem object's cntrl handle */ @@ -161,7 +176,7 @@ device_pager_populate_object( return kr; if(!vm_object->phys_contiguous) { - int null_size = 0; + unsigned int null_size = 0; kr = vm_object_upl_request(vm_object, (vm_object_offset_t)offset, size, &upl, NULL, &null_size, (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE)); @@ -187,7 +202,7 @@ device_pager_lookup( device_pager_t device_object; device_object = (device_pager_t)name; - assert(device_object->pager == &device_pager_workaround); + assert(device_object->pager_ops == &device_pager_ops); return (device_object); } @@ -254,9 +269,11 @@ device_pager_init( /*ARGSUSED6*/ kern_return_t device_pager_data_return( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t data_cnt, + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t data_cnt, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, __unused boolean_t dirty, __unused boolean_t kernel_copy, __unused int upl_flags) @@ -336,7 +353,7 @@ device_pager_deallocate( * We still have to release the "memory object control" * handle. */ - assert(device_control->object == VM_OBJECT_NULL); + assert(device_control->moc_object == VM_OBJECT_NULL); memory_object_control_deallocate(device_control); device_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; @@ -419,7 +436,7 @@ device_object_create() device_object = (struct device_pager *) zalloc(device_pager_zone); if (device_object == DEVICE_PAGER_NULL) return(DEVICE_PAGER_NULL); - device_object->pager = &device_pager_workaround; + device_object->pager_ops = &device_pager_ops; device_object->pager_ikot = IKOT_MEMORY_OBJECT; device_object->ref_count = 1; device_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index 286fcf691..f990e664d 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -1541,15 +1541,17 @@ memory_object_iopl_request( vm_object_reference(object); named_entry_unlock(named_entry); } - } else { + } else if (ip_kotype(port) == IKOT_MEM_OBJ_CONTROL) { memory_object_control_t control; - control = (memory_object_control_t)port->ip_kobject; + control = (memory_object_control_t) port; if (control == NULL) return (KERN_INVALID_ARGUMENT); object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); vm_object_reference(object); + } else { + return KERN_INVALID_ARGUMENT; } if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); @@ -1811,152 +1813,12 @@ memory_object_page_op( int *flags) { vm_object_t object; - vm_page_t dst_page; - object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); - vm_object_lock(object); - - if(ops & UPL_POP_PHYSICAL) { - if(object->phys_contiguous) { - if (phys_entry) { - *phys_entry = (ppnum_t) - (object->shadow_offset >> 12); - } - vm_object_unlock(object); - return KERN_SUCCESS; - } else { - vm_object_unlock(object); - return KERN_INVALID_OBJECT; - } - } - if(object->phys_contiguous) { - vm_object_unlock(object); - return KERN_INVALID_OBJECT; - } - - while(TRUE) { - if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { - vm_object_unlock(object); - return KERN_FAILURE; - } - - /* Sync up on getting the busy bit */ - if((dst_page->busy || dst_page->cleaning) && - (((ops & UPL_POP_SET) && - (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { - /* someone else is playing with the page, we will */ - /* have to wait */ - PAGE_SLEEP(object, dst_page, THREAD_UNINT); - continue; - } - - if (ops & UPL_POP_DUMP) { - vm_page_lock_queues(); - - if (dst_page->no_isync == FALSE) - pmap_disconnect(dst_page->phys_page); - vm_page_free(dst_page); - - vm_page_unlock_queues(); - break; - } - - if (flags) { - *flags = 0; - - /* Get the condition of flags before requested ops */ - /* are undertaken */ - - if(dst_page->dirty) *flags |= UPL_POP_DIRTY; - if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; - if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; - if(dst_page->absent) *flags |= UPL_POP_ABSENT; - if(dst_page->busy) *flags |= UPL_POP_BUSY; - } - - /* The caller should have made a call either contingent with */ - /* or prior to this call to set UPL_POP_BUSY */ - if(ops & UPL_POP_SET) { - /* The protection granted with this assert will */ - /* not be complete. If the caller violates the */ - /* convention and attempts to change page state */ - /* without first setting busy we may not see it */ - /* because the page may already be busy. However */ - /* if such violations occur we will assert sooner */ - /* or later. */ - assert(dst_page->busy || (ops & UPL_POP_BUSY)); - if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE; - if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; - if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; - if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; - if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; - } - - if(ops & UPL_POP_CLR) { - assert(dst_page->busy); - if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; - if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; - if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; - if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; - if (ops & UPL_POP_BUSY) { - dst_page->busy = FALSE; - PAGE_WAKEUP(dst_page); - } - } - - if (dst_page->encrypted) { - /* - * ENCRYPTED SWAP: - * We need to decrypt this encrypted page before the - * caller can access its contents. - * But if the caller really wants to access the page's - * contents, they have to keep the page "busy". - * Otherwise, the page could get recycled or re-encrypted - * at any time. - */ - if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && - dst_page->busy) { - /* - * The page is stable enough to be accessed by - * the caller, so make sure its contents are - * not encrypted. - */ - vm_page_decrypt(dst_page, 0); - } else { - /* - * The page is not busy, so don't bother - * decrypting it, since anything could - * happen to it between now and when the - * caller wants to access it. - * We should not give the caller access - * to this page. - */ - assert(!phys_entry); - } - } - - if (phys_entry) { - /* - * The physical page number will remain valid - * only if the page is kept busy. - * ENCRYPTED SWAP: make sure we don't let the - * caller access an encrypted page. - */ - assert(dst_page->busy); - assert(!dst_page->encrypted); - *phys_entry = dst_page->phys_page; - } - - break; - } - - vm_object_unlock(object); - return KERN_SUCCESS; - + return vm_object_page_op(object, offset, ops, phys_entry, flags); } /* @@ -1977,71 +1839,17 @@ memory_object_range_op( int ops, int *range) { - memory_object_offset_t offset; vm_object_t object; - vm_page_t dst_page; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); - if (object->resident_page_count == 0) { - if (range) { - if (ops & UPL_ROP_PRESENT) - *range = 0; - else - *range = offset_end - offset_beg; - } - return KERN_SUCCESS; - } - vm_object_lock(object); - - if (object->phys_contiguous) { - vm_object_unlock(object); - return KERN_INVALID_OBJECT; - } - - offset = offset_beg; - - while (offset < offset_end) { - dst_page = vm_page_lookup(object, offset); - if (dst_page != VM_PAGE_NULL) { - if (ops & UPL_ROP_DUMP) { - if (dst_page->busy || dst_page->cleaning) { - /* - * someone else is playing with the - * page, we will have to wait - */ - PAGE_SLEEP(object, - dst_page, THREAD_UNINT); - /* - * need to relook the page up since it's - * state may have changed while we slept - * it might even belong to a different object - * at this point - */ - continue; - } - vm_page_lock_queues(); - - if (dst_page->no_isync == FALSE) - pmap_disconnect(dst_page->phys_page); - vm_page_free(dst_page); - - vm_page_unlock_queues(); - } else if (ops & UPL_ROP_ABSENT) - break; - } else if (ops & UPL_ROP_PRESENT) - break; - - offset += PAGE_SIZE; - } - vm_object_unlock(object); - - if (range) - *range = offset - offset_beg; - - return KERN_SUCCESS; + return vm_object_range_op(object, + offset_beg, + offset_end, + ops, + range); } @@ -2084,8 +1892,10 @@ memory_object_control_allocate( memory_object_control_t control; control = (memory_object_control_t)zalloc(mem_obj_control_zone); - if (control != MEMORY_OBJECT_CONTROL_NULL) - control->object = object; + if (control != MEMORY_OBJECT_CONTROL_NULL) { + control->moc_object = object; + control->moc_ikot = IKOT_MEM_OBJ_CONTROL; /* fake ip_kotype */ + } return (control); } @@ -2094,19 +1904,20 @@ memory_object_control_collapse( memory_object_control_t control, vm_object_t object) { - assert((control->object != VM_OBJECT_NULL) && - (control->object != object)); - control->object = object; + assert((control->moc_object != VM_OBJECT_NULL) && + (control->moc_object != object)); + control->moc_object = object; } __private_extern__ vm_object_t memory_object_control_to_vm_object( memory_object_control_t control) { - if (control == MEMORY_OBJECT_CONTROL_NULL) + if (control == MEMORY_OBJECT_CONTROL_NULL || + control->moc_ikot != IKOT_MEM_OBJ_CONTROL) return VM_OBJECT_NULL; - return (control->object); + return (control->moc_object); } memory_object_control_t @@ -2147,8 +1958,8 @@ void memory_object_control_disable( memory_object_control_t control) { - assert(control->object != VM_OBJECT_NULL); - control->object = VM_OBJECT_NULL; + assert(control->moc_object != VM_OBJECT_NULL); + control->moc_object = VM_OBJECT_NULL; } void @@ -2185,30 +1996,16 @@ convert_memory_object_to_port( void memory_object_reference( memory_object_t memory_object) { - -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - vnode_pager_reference(memory_object); - } else if (memory_object->pager == &device_pager_workaround) { - device_pager_reference(memory_object); - } else -#endif - dp_memory_object_reference(memory_object); + (memory_object->mo_pager_ops->memory_object_reference)( + memory_object); } /* Routine memory_object_deallocate */ void memory_object_deallocate( memory_object_t memory_object) { - -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - vnode_pager_deallocate(memory_object); - } else if (memory_object->pager == &device_pager_workaround) { - device_pager_deallocate(memory_object); - } else -#endif - dp_memory_object_deallocate(memory_object); + (memory_object->mo_pager_ops->memory_object_deallocate)( + memory_object); } @@ -2220,20 +2017,10 @@ kern_return_t memory_object_init memory_object_cluster_size_t memory_object_page_size ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_init(memory_object, - memory_control, - memory_object_page_size); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_init(memory_object, - memory_control, - memory_object_page_size); - } else -#endif - return dp_memory_object_init(memory_object, - memory_control, - memory_object_page_size); + return (memory_object->mo_pager_ops->memory_object_init)( + memory_object, + memory_control, + memory_object_page_size); } /* Routine memory_object_terminate */ @@ -2242,14 +2029,8 @@ kern_return_t memory_object_terminate memory_object_t memory_object ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_terminate(memory_object); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_terminate(memory_object); - } else -#endif - return dp_memory_object_terminate(memory_object); + return (memory_object->mo_pager_ops->memory_object_terminate)( + memory_object); } /* Routine memory_object_data_request */ @@ -2261,23 +2042,11 @@ kern_return_t memory_object_data_request vm_prot_t desired_access ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_data_request(memory_object, - offset, - length, - desired_access); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_data_request(memory_object, - offset, - length, - desired_access); - } else -#endif - return dp_memory_object_data_request(memory_object, - offset, - length, - desired_access); + return (memory_object->mo_pager_ops->memory_object_data_request)( + memory_object, + offset, + length, + desired_access); } /* Routine memory_object_data_return */ @@ -2293,37 +2062,15 @@ kern_return_t memory_object_data_return int upl_flags ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_data_return(memory_object, - offset, - size, - resid_offset, - io_error, - dirty, - kernel_copy, - upl_flags); - } else if (memory_object->pager == &device_pager_workaround) { - - return device_pager_data_return(memory_object, - offset, - size, - dirty, - kernel_copy, - upl_flags); - } - else -#endif - { - return dp_memory_object_data_return(memory_object, - offset, - size, - NULL, - NULL, - dirty, - kernel_copy, - upl_flags); - } + return (memory_object->mo_pager_ops->memory_object_data_return)( + memory_object, + offset, + size, + resid_offset, + io_error, + dirty, + kernel_copy, + upl_flags); } /* Routine memory_object_data_initialize */ @@ -2334,20 +2081,10 @@ kern_return_t memory_object_data_initialize vm_size_t size ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_data_initialize(memory_object, - offset, - size); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_data_initialize(memory_object, - offset, - size); - } else -#endif - return dp_memory_object_data_initialize(memory_object, - offset, - size); + return (memory_object->mo_pager_ops->memory_object_data_initialize)( + memory_object, + offset, + size); } /* Routine memory_object_data_unlock */ @@ -2359,23 +2096,11 @@ kern_return_t memory_object_data_unlock vm_prot_t desired_access ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_data_unlock(memory_object, - offset, - size, - desired_access); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_data_unlock(memory_object, - offset, - size, - desired_access); - } else -#endif - return dp_memory_object_data_unlock(memory_object, - offset, - size, - desired_access); + return (memory_object->mo_pager_ops->memory_object_data_unlock)( + memory_object, + offset, + size, + desired_access); } /* Routine memory_object_synchronize */ @@ -2387,23 +2112,11 @@ kern_return_t memory_object_synchronize vm_sync_t sync_flags ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_synchronize(memory_object, - offset, - size, - sync_flags); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_synchronize(memory_object, - offset, - size, - sync_flags); - } else -#endif - return dp_memory_object_synchronize(memory_object, - offset, - size, - sync_flags); + return (memory_object->mo_pager_ops->memory_object_synchronize)( + memory_object, + offset, + size, + sync_flags); } /* Routine memory_object_unmap */ @@ -2412,14 +2125,8 @@ kern_return_t memory_object_unmap memory_object_t memory_object ) { -#ifdef MACH_BSD - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_unmap(memory_object); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_unmap(memory_object); - } else -#endif - return dp_memory_object_unmap(memory_object); + return (memory_object->mo_pager_ops->memory_object_unmap)( + memory_object); } /* Routine memory_object_create */ diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h index 1bb55a3a8..8b915a089 100644 --- a/osfmk/vm/memory_object.h +++ b/osfmk/vm/memory_object.h @@ -112,9 +112,9 @@ extern ipc_port_t convert_upl_to_port( upl_t ); __private_extern__ void upl_no_senders(ipc_port_t, mach_port_mscount_t); extern kern_return_t memory_object_free_from_cache( - host_t host, - int *pager_id, - int *count); + host_t host, + memory_object_pager_ops_t pager_ops, + int *count); extern kern_return_t memory_object_iopl_request( ipc_port_t port, diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index bc99746c9..7638a13a3 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -178,7 +178,9 @@ extern void pmap_virtual_space( /* * Routines to manage the physical map data structure. */ -extern pmap_t pmap_create(vm_map_size_t size); /* Create a pmap_t. */ +extern pmap_t pmap_create( /* Create a pmap_t. */ + vm_map_size_t size, + boolean_t is_64bit); extern pmap_t (pmap_kernel)(void); /* Return the kernel's pmap */ extern void pmap_reference(pmap_t pmap); /* Gain a reference. */ extern void pmap_destroy(pmap_t pmap); /* Release a reference. */ @@ -248,6 +250,9 @@ extern kern_return_t (pmap_attribute_cache_sync)( /* Flush appropriate vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value); +extern unsigned int (pmap_cache_attributes)( + ppnum_t pn); + /* * debug/assertions. pmap_verify_free returns true iff * the given physical page is mapped into no pmap. diff --git a/osfmk/vm/task_working_set.c b/osfmk/vm/task_working_set.c index 6292cabe5..5a9fe42eb 100644 --- a/osfmk/vm/task_working_set.c +++ b/osfmk/vm/task_working_set.c @@ -1071,9 +1071,13 @@ tws_build_cluster( return; if (!object->internal) { + /* XXX FBDP !internal doesn't mean vnode pager */ kret = vnode_pager_get_object_size( object->pager, &object_size); + if (kret != KERN_SUCCESS) { + object_size = object->size; + } } else { object_size = object->size; } diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c new file mode 100644 index 000000000..7e9797bf0 --- /dev/null +++ b/osfmk/vm/vm_apple_protect.c @@ -0,0 +1,1023 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + + +/* + * APPLE PROTECT MEMORY PAGER + * + * This external memory manager (EMM) handles memory from the encrypted + * sections of some executables protected by the DSMOS kernel extension. + * + * It mostly handles page-in requests (from memory_object_data_request()) by + * getting the encrypted data from its backing VM object, itself backed by + * the encrypted file, decrypting it and providing it to VM. + * + * The decrypted pages will never be dirtied, so the memory manager doesn't + * need to handle page-out requests (from memory_object_data_return()). The + * pages need to be mapped copy-on-write, so that the originals stay clean. + * + * We don't expect to have to handle a large number of apple-protected + * binaries, so the data structures are very simple (simple linked list) + * for now. + */ + +/* forward declarations */ +void apple_protect_pager_reference(memory_object_t mem_obj); +void apple_protect_pager_deallocate(memory_object_t mem_obj); +kern_return_t apple_protect_pager_init(memory_object_t mem_obj, + memory_object_control_t control, + vm_size_t pg_size); +kern_return_t apple_protect_pager_terminate(memory_object_t mem_obj); +kern_return_t apple_protect_pager_data_request(memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t length, + vm_prot_t protection_required); +kern_return_t apple_protect_pager_data_return(memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t data_cnt, + memory_object_offset_t *resid_offset, + int *io_error, + boolean_t dirty, + boolean_t kernel_copy, + int upl_flags); +kern_return_t apple_protect_pager_data_initialize(memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t data_cnt); +kern_return_t apple_protect_pager_data_unlock(memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size, + vm_prot_t desired_access); +kern_return_t apple_protect_pager_synchronize(memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t length, + vm_sync_t sync_flags); +kern_return_t apple_protect_pager_unmap(memory_object_t mem_obj); + +/* + * Vector of VM operations for this EMM. + * These routines are invoked by VM via the memory_object_*() interfaces. + */ +const struct memory_object_pager_ops apple_protect_pager_ops = { + apple_protect_pager_reference, + apple_protect_pager_deallocate, + apple_protect_pager_init, + apple_protect_pager_terminate, + apple_protect_pager_data_request, + apple_protect_pager_data_return, + apple_protect_pager_data_initialize, + apple_protect_pager_data_unlock, + apple_protect_pager_synchronize, + apple_protect_pager_unmap, + "apple protect pager" +}; + +/* + * The "apple_protect_pager" describes a memory object backed by + * the "apple protect" EMM. + */ +typedef struct apple_protect_pager { + memory_object_pager_ops_t pager_ops; /* == &apple_protect_pager_ops */ + unsigned int pager_ikot; /* JMM: fake ip_kotype() */ + queue_chain_t pager_queue; /* next & prev pagers */ + unsigned int ref_count; /* reference count */ + boolean_t is_ready; /* is this pager ready ? */ + boolean_t is_mapped; /* is this mem_obj mapped ? */ + memory_object_control_t pager_control; /* mem object control handle */ + vm_object_t backing_object; /* VM obj w/ encrypted data */ +} *apple_protect_pager_t; +#define APPLE_PROTECT_PAGER_NULL ((apple_protect_pager_t) NULL) + +/* + * List of memory objects managed by this EMM. + * The list is protected by the "apple_protect_pager_lock" lock. + */ +int apple_protect_pager_count = 0; /* number of pagers */ +int apple_protect_pager_count_mapped = 0; /* number of unmapped pagers */ +queue_head_t apple_protect_pager_queue; +decl_mutex_data(,apple_protect_pager_lock) + +/* + * Maximum number of unmapped pagers we're willing to keep around. + */ +int apple_protect_pager_cache_limit = 10; + +/* + * Statistics & counters. + */ +int apple_protect_pager_count_max = 0; +int apple_protect_pager_count_unmapped_max = 0; +int apple_protect_pager_num_trim_max = 0; +int apple_protect_pager_num_trim_total = 0; + +/* internal prototypes */ +apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object); +apple_protect_pager_t apple_protect_pager_lookup(memory_object_t mem_obj); +void apple_protect_pager_dequeue(apple_protect_pager_t pager); +void apple_protect_pager_deallocate_internal(apple_protect_pager_t pager, + boolean_t locked); +void apple_protect_pager_terminate_internal(apple_protect_pager_t pager); +void apple_protect_pager_trim(void); + + +#if DEBUG +int apple_protect_pagerdebug = 0; +#define PAGER_ALL 0xffffffff +#define PAGER_INIT 0x00000001 +#define PAGER_PAGEIN 0x00000002 + +#define PAGER_DEBUG(LEVEL, A) \ + MACRO_BEGIN \ + if ((apple_protect_pagerdebug & LEVEL)==LEVEL) { \ + printf A; \ + } \ + MACRO_END +#else +#define PAGER_DEBUG(LEVEL, A) +#endif + + +void +apple_protect_pager_bootstrap(void) +{ + mutex_init(&apple_protect_pager_lock, 0); + queue_init(&apple_protect_pager_queue); +} + +/* + * apple_protect_pager_init() + * + * Initialize the memory object and makes it ready to be used and mapped. + */ +kern_return_t +apple_protect_pager_init( + memory_object_t mem_obj, + memory_object_control_t control, +#if !DEBUG + __unused +#endif + vm_size_t pg_size) +{ + apple_protect_pager_t pager; + kern_return_t kr; + memory_object_attr_info_data_t attributes; + + PAGER_DEBUG(PAGER_ALL, + ("apple_protect_pager_init: %p, %p, %x\n", + mem_obj, control, pg_size)); + + if (control == MEMORY_OBJECT_CONTROL_NULL) + return KERN_INVALID_ARGUMENT; + + pager = apple_protect_pager_lookup(mem_obj); + + memory_object_control_reference(control); + + pager->pager_control = control; + + attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; + /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/ + attributes.cluster_size = (1 << (PAGE_SHIFT)); + attributes.may_cache_object = FALSE; + attributes.temporary = TRUE; + + kr = memory_object_change_attributes( + control, + MEMORY_OBJECT_ATTRIBUTE_INFO, + (memory_object_info_t) &attributes, + MEMORY_OBJECT_ATTR_INFO_COUNT); + if (kr != KERN_SUCCESS) + panic("apple_protect_pager_init: " + "memory_object_change_attributes() failed"); + + return KERN_SUCCESS; +} + +/* + * apple_protect_data_return() + * + * Handles page-out requests from VM. This should never happen since + * the pages provided by this EMM are not supposed to be dirty or dirtied + * and VM should simply discard the contents and reclaim the pages if it + * needs to. + */ +kern_return_t +apple_protect_pager_data_return( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t data_cnt, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags) +{ + panic("apple_protect_pager_data_return: should never get called"); + return KERN_FAILURE; +} + +kern_return_t +apple_protect_pager_data_initialize( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t data_cnt) +{ + panic("apple_protect_pager_data_initialize: should never get called"); + return KERN_FAILURE; +} + +kern_return_t +apple_protect_pager_data_unlock( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t size, + __unused vm_prot_t desired_access) +{ + return KERN_FAILURE; +} + +/* + * apple_protect_pager_data_request() + * + * Handles page-in requests from VM. + */ +kern_return_t +apple_protect_pager_data_request( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t length, +#if !DEBUG + __unused +#endif + vm_prot_t protection_required) +{ + apple_protect_pager_t pager; + memory_object_control_t mo_control; + upl_t upl = NULL; + int upl_flags; + upl_size_t upl_size; + upl_page_info_t *upl_pl; + vm_object_t src_object, dst_object; + kern_return_t kr, retval; + vm_map_offset_t src_mapping = 0, dst_mapping = 0; + vm_offset_t src_vaddr, dst_vaddr; + vm_offset_t cur_offset; + boolean_t src_map_page_by_page; + vm_map_entry_t map_entry; + + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_data_request: %x, %llx, %llxx, %x\n", mem_obj, offset, length, protection_required)); + + pager = apple_protect_pager_lookup(mem_obj); + assert(pager->is_ready); + assert(pager->ref_count > 1); /* pager is alive and mapped */ + + PAGER_DEBUG(PAGER_PAGEIN, ("apple_protect_pager_data_request: %x, %llx, %llx, %x, pager %x\n", mem_obj, offset, length, protection_required, pager)); + + /* + * Map the encrypted data in the kernel address space from the + * backing VM object (itself backed by the encrypted file via + * the vnode pager). + */ + src_object = pager->backing_object; + assert(src_object != VM_OBJECT_NULL); + vm_object_reference(src_object); /* ref. for the mapping */ + src_mapping = 0; + kr = vm_map_enter(kernel_map, + &src_mapping, + length, + 0, + VM_FLAGS_ANYWHERE, + src_object, + offset, + FALSE, + VM_PROT_READ, + VM_PROT_READ, + VM_INHERIT_NONE); + switch (kr) { + case KERN_SUCCESS: + src_map_page_by_page = FALSE; + src_vaddr = CAST_DOWN(vm_offset_t, src_mapping); + break; + case KERN_NO_SPACE: + /* we can't map the entire section, so map it page by page */ + src_map_page_by_page = TRUE; + vm_object_deallocate(src_object); + break; + default: + vm_object_deallocate(src_object); + retval = kr; + goto done; + } + + + /* + * Gather in a UPL all the VM pages requested by VM. + */ + mo_control = pager->pager_control; + + upl_size = length; + upl_flags = + UPL_RET_ONLY_ABSENT | + UPL_SET_LITE | + UPL_NO_SYNC | + UPL_CLEAN_IN_PLACE | /* triggers UPL_CLEAR_DIRTY */ + UPL_SET_INTERNAL; + kr = memory_object_upl_request(mo_control, + offset, upl_size, + &upl, NULL, NULL, upl_flags); + if (kr != KERN_SUCCESS) { + retval = kr; + goto done; + } + + /* + * Reserve a virtual page in the kernel address space to map each + * destination physical page when it's its turn to be filled. + */ + dst_object = mo_control->moc_object; + assert(dst_object != VM_OBJECT_NULL); + dst_mapping = 0; + vm_object_reference(kernel_object); /* ref. for mapping */ + kr = vm_map_find_space(kernel_map, + &dst_mapping, + PAGE_SIZE_64, + 0, + 0, + &map_entry); + if (kr != KERN_SUCCESS) { + vm_object_deallocate(kernel_object); + retval = kr; + goto done; + } + map_entry->object.vm_object = kernel_object; + map_entry->offset = dst_mapping - VM_MIN_KERNEL_ADDRESS; + vm_map_unlock(kernel_map); + dst_vaddr = CAST_DOWN(vm_offset_t, dst_mapping); + + /* + * Fill in the contents of the pages requested by VM. + */ + upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl); + for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) { + ppnum_t dst_pnum; + + /* + * Establish an explicit pmap mapping of the destination + * physical page. + * We can't do a regular VM mapping because the VM page + * is "busy". + */ + if (!upl_page_present(upl_pl, cur_offset / PAGE_SIZE)) { + /* this page is not in the UPL: skip it */ + continue; + } + dst_pnum = (addr64_t) + upl_phys_page(upl_pl, cur_offset / PAGE_SIZE); + assert (dst_pnum != 0); + pmap_enter(kernel_pmap, dst_mapping, dst_pnum, + VM_PROT_READ | VM_PROT_WRITE, + dst_object->wimg_bits & VM_WIMG_MASK, + FALSE); + + /* + * Map the source (encrypted) page in the kernel's + * virtual address space. + */ + if (src_map_page_by_page) { + vm_object_reference(src_object); /* ref. for mapping */ + kr = vm_map_enter(kernel_map, + &src_mapping, + PAGE_SIZE_64, + 0, + VM_FLAGS_ANYWHERE, + src_object, + offset + cur_offset, + FALSE, + VM_PROT_READ, + VM_PROT_READ, + VM_INHERIT_NONE); + if (kr != KERN_SUCCESS) { + vm_object_deallocate(src_object); + retval = kr; + goto done; + } + src_vaddr = CAST_DOWN(vm_offset_t, src_mapping); + } else { + src_vaddr = src_mapping + cur_offset; + } + + /* + * Decrypt the encrypted contents of the source page + * into the destination page. + */ + dsmos_page_transform((const void *) src_vaddr, + (void *) dst_vaddr); + + /* + * Remove the pmap mapping of the destination page + * in the kernel. + */ + pmap_remove(kernel_pmap, + (addr64_t) dst_mapping, + (addr64_t) (dst_mapping + PAGE_SIZE_64)); + if (src_map_page_by_page) { + /* + * Remove the kernel mapping of the source page. + * This releases the extra reference we took on + * src_object. + */ + kr = vm_map_remove(kernel_map, + src_mapping, + src_mapping + PAGE_SIZE_64, + VM_MAP_NO_FLAGS); + assert(kr == KERN_SUCCESS); + src_mapping = 0; + } + } + + retval = KERN_SUCCESS; +done: + if (src_mapping != 0) { + /* clean up the mapping of the source pages */ + kr = vm_map_remove(kernel_map, + src_mapping, + src_mapping + length, + VM_MAP_NO_FLAGS); + assert (kr == KERN_SUCCESS); + src_mapping = 0; + src_vaddr = 0; + } + if (upl != NULL) { + /* clean up the UPL */ + + /* + * The pages are currently dirty because we've just been + * writing on them, but as far as we're concerned, they're + * clean since they contain their "original" contents as + * provided by us, the pager. + * Tell the UPL to mark them "clean". + */ + upl_clear_dirty(upl, TRUE); + + /* abort or commit the UPL */ + if (retval != KERN_SUCCESS) { + upl_abort(upl, 0); + } else { + upl_commit(upl, NULL, 0); + } + + /* and deallocate the UPL */ + upl_deallocate(upl); + upl = NULL; + } + if (dst_mapping != 0) { + /* clean up the mapping of the destination pages */ + kr = vm_map_remove(kernel_map, + dst_mapping, + dst_mapping + PAGE_SIZE_64, + VM_MAP_NO_FLAGS); + assert(kr == KERN_SUCCESS); + dst_mapping = 0; + dst_vaddr = 0; + } + + return retval; +} + +/* + * apple_protect_pager_reference() + * + * Get a reference on this memory object. + * For external usage only. Assumes that the initial reference count is not 0, + * i.e one should not "revive" a dead pager this way. + */ +void +apple_protect_pager_reference( + memory_object_t mem_obj) +{ + apple_protect_pager_t pager; + + pager = apple_protect_pager_lookup(mem_obj); + + mutex_lock(&apple_protect_pager_lock); + assert(pager->ref_count > 0); + pager->ref_count++; + mutex_unlock(&apple_protect_pager_lock); +} + + +/* + * apple_protect_pager_dequeue: + * + * Removes a pager from the list of pagers. + * + * The caller must hold "apple_protect_pager_lock". + */ +void +apple_protect_pager_dequeue( + apple_protect_pager_t pager) +{ + assert(!pager->is_mapped); + + queue_remove(&apple_protect_pager_queue, + pager, + apple_protect_pager_t, + pager_queue); + pager->pager_queue.next = NULL; + pager->pager_queue.prev = NULL; + + apple_protect_pager_count--; +} + +/* + * apple_protect_pager_terminate_internal: + * + * Trigger the asynchronous termination of the memory object associated + * with this pager. + * When the memory object is terminated, there will be one more call + * to memory_object_deallocate() (i.e. apple_protect_pager_deallocate()) + * to finish the clean up. + * + * "apple_protect_pager_lock" should not be held by the caller. + * We don't need the lock because the pager has already been removed from + * the pagers' list and is now ours exclusively. + */ +void +apple_protect_pager_terminate_internal( + apple_protect_pager_t pager) +{ + assert(pager->is_ready); + assert(!pager->is_mapped); + + if (pager->backing_object != VM_OBJECT_NULL) { + vm_object_deallocate(pager->backing_object); + pager->backing_object = VM_OBJECT_NULL; + } + + /* trigger the destruction of the memory object */ + memory_object_destroy(pager->pager_control, 0); +} + +/* + * apple_protect_pager_deallocate_internal() + * + * Release a reference on this pager and free it when the last + * reference goes away. + * Can be called with apple_protect_pager_lock held or not but always returns + * with it unlocked. + */ +void +apple_protect_pager_deallocate_internal( + apple_protect_pager_t pager, + boolean_t locked) +{ + boolean_t needs_trimming; + int count_unmapped; + + if (! locked) { + mutex_lock(&apple_protect_pager_lock); + } + + count_unmapped = (apple_protect_pager_count - + apple_protect_pager_count_mapped); + if (count_unmapped > apple_protect_pager_cache_limit) { + /* we have too many unmapped pagers: trim some */ + needs_trimming = TRUE; + } else { + needs_trimming = FALSE; + } + + /* drop a reference on this pager */ + pager->ref_count--; + + if (pager->ref_count == 1) { + /* + * Only the "named" reference is left, which means that + * no one is realy holding on to this pager anymore. + * Terminate it. + */ + apple_protect_pager_dequeue(pager); + /* the pager is all ours: no need for the lock now */ + mutex_unlock(&apple_protect_pager_lock); + apple_protect_pager_terminate_internal(pager); + } else if (pager->ref_count == 0) { + /* + * Dropped the existence reference; the memory object has + * been terminated. Do some final cleanup and release the + * pager structure. + */ + mutex_unlock(&apple_protect_pager_lock); + if (pager->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_deallocate(pager->pager_control); + pager->pager_control = MEMORY_OBJECT_CONTROL_NULL; + } + kfree(pager, sizeof (*pager)); + pager = APPLE_PROTECT_PAGER_NULL; + } else { + /* there are still plenty of references: keep going... */ + mutex_unlock(&apple_protect_pager_lock); + } + + if (needs_trimming) { + apple_protect_pager_trim(); + } + /* caution: lock is not held on return... */ +} + +/* + * apple_protect_pager_deallocate() + * + * Release a reference on this pager and free it when the last + * reference goes away. + */ +void +apple_protect_pager_deallocate( + memory_object_t mem_obj) +{ + apple_protect_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_deallocate: %x\n", mem_obj)); + pager = apple_protect_pager_lookup(mem_obj); + apple_protect_pager_deallocate_internal(pager, FALSE); +} + +/* + * + */ +kern_return_t +apple_protect_pager_terminate( +#if !DEBUG + __unused +#endif + memory_object_t mem_obj) +{ + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_terminate: %x\n", mem_obj)); + + return KERN_SUCCESS; +} + +/* + * + */ +kern_return_t +apple_protect_pager_synchronize( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t length, + __unused vm_sync_t sync_flags) +{ + apple_protect_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_synchronize: %x\n", mem_obj)); + + pager = apple_protect_pager_lookup(mem_obj); + + memory_object_synchronize_completed(pager->pager_control, + offset, length); + + return KERN_SUCCESS; +} + +/* + * apple_protect_pager_map() + * + * This allows VM to let us, the EMM, know that this memory object + * is currently mapped one or more times. This is called by VM only the first + * time the memory object gets mapped and we take one extra reference on the + * memory object to account for all its mappings. + */ +void +apple_protect_pager_map( + memory_object_t mem_obj) +{ + apple_protect_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_map: %x\n", mem_obj)); + + pager = apple_protect_pager_lookup(mem_obj); + + mutex_lock(&apple_protect_pager_lock); + assert(pager->is_ready); + assert(pager->ref_count > 0); /* pager is alive */ + if (pager->is_mapped == FALSE) { + /* + * First mapping of this pager: take an extra reference + * that will remain until all the mappings of this pager + * are removed. + */ + pager->is_mapped = TRUE; + pager->ref_count++; + apple_protect_pager_count_mapped++; + } + mutex_unlock(&apple_protect_pager_lock); +} + +/* + * apple_protect_pager_unmap() + * + * This is called by VM when this memory object is no longer mapped anywhere. + */ +kern_return_t +apple_protect_pager_unmap( + memory_object_t mem_obj) +{ + apple_protect_pager_t pager; + int count_unmapped; + + PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_unmap: %x\n", mem_obj)); + + pager = apple_protect_pager_lookup(mem_obj); + + mutex_lock(&apple_protect_pager_lock); + if (pager->is_mapped) { + /* + * All the mappings are gone, so let go of the one extra + * reference that represents all the mappings of this pager. + */ + apple_protect_pager_count_mapped--; + count_unmapped = (apple_protect_pager_count - + apple_protect_pager_count_mapped); + if (count_unmapped > apple_protect_pager_count_unmapped_max) { + apple_protect_pager_count_unmapped_max = count_unmapped; + } + pager->is_mapped = FALSE; + apple_protect_pager_deallocate_internal(pager, TRUE); + /* caution: deallocate_internal() released the lock ! */ + } else { + mutex_unlock(&apple_protect_pager_lock); + } + + return KERN_SUCCESS; +} + + +/* + * + */ +apple_protect_pager_t +apple_protect_pager_lookup( + memory_object_t mem_obj) +{ + apple_protect_pager_t pager; + + pager = (apple_protect_pager_t) mem_obj; + assert(pager->pager_ops == &apple_protect_pager_ops); + assert(pager->ref_count > 0); + return pager; +} + +apple_protect_pager_t +apple_protect_pager_create( + vm_object_t backing_object) +{ + apple_protect_pager_t pager, pager2; + memory_object_control_t control; + kern_return_t kr; + + pager = (apple_protect_pager_t) kalloc(sizeof (*pager)); + if (pager == APPLE_PROTECT_PAGER_NULL) { + return APPLE_PROTECT_PAGER_NULL; + } + + /* + * The vm_map call takes both named entry ports and raw memory + * objects in the same parameter. We need to make sure that + * vm_map does not see this object as a named entry port. So, + * we reserve the second word in the object for a fake ip_kotype + * setting - that will tell vm_map to use it as a memory object. + */ + pager->pager_ops = &apple_protect_pager_ops; + pager->pager_ikot = IKOT_MEMORY_OBJECT; + pager->is_ready = FALSE;/* not ready until it has a "name" */ + pager->ref_count = 2; /* existence + setup reference */ + pager->is_mapped = FALSE; + pager->pager_control = MEMORY_OBJECT_CONTROL_NULL; + pager->backing_object = backing_object; + vm_object_reference(backing_object); + + mutex_lock(&apple_protect_pager_lock); + /* see if anyone raced us to create a pager for the same object */ + queue_iterate(&apple_protect_pager_queue, + pager2, + apple_protect_pager_t, + pager_queue) { + if (pager2->backing_object == backing_object) { + break; + } + } + if (! queue_end(&apple_protect_pager_queue, + (queue_entry_t) pager2)) { + /* while we hold the lock, transfer our setup ref to winner */ + pager2->ref_count++; + /* we lost the race, down with the loser... */ + mutex_unlock(&apple_protect_pager_lock); + vm_object_deallocate(pager->backing_object); + pager->backing_object = VM_OBJECT_NULL; + kfree(pager, sizeof (*pager)); + /* ... and go with the winner */ + pager = pager2; + /* let the winner make sure the pager gets ready */ + return pager; + } + + /* enter new pager at the head of our list of pagers */ + queue_enter_first(&apple_protect_pager_queue, + pager, + apple_protect_pager_t, + pager_queue); + apple_protect_pager_count++; + if (apple_protect_pager_count > apple_protect_pager_count_max) { + apple_protect_pager_count_max = apple_protect_pager_count; + } + mutex_unlock(&apple_protect_pager_lock); + + kr = memory_object_create_named((memory_object_t) pager, + 0, + &control); + assert(kr == KERN_SUCCESS); + + mutex_lock(&apple_protect_pager_lock); + /* the new pager is now ready to be used */ + pager->is_ready = TRUE; + mutex_unlock(&apple_protect_pager_lock); + + /* wakeup anyone waiting for this pager to be ready */ + thread_wakeup(&pager->is_ready); + + return pager; +} + +/* + * apple_protect_pager_setup() + * + * Provide the caller with a memory object backed by the provided + * "backing_object" VM object. If such a memory object already exists, + * re-use it, otherwise create a new memory object. + */ +memory_object_t +apple_protect_pager_setup( + vm_object_t backing_object) +{ + apple_protect_pager_t pager; + + mutex_lock(&apple_protect_pager_lock); + + queue_iterate(&apple_protect_pager_queue, + pager, + apple_protect_pager_t, + pager_queue) { + if (pager->backing_object == backing_object) { + break; + } + } + if (queue_end(&apple_protect_pager_queue, + (queue_entry_t) pager)) { + /* no existing pager for this backing object */ + pager = APPLE_PROTECT_PAGER_NULL; + } else { + /* make sure pager doesn't disappear */ + pager->ref_count++; + } + + mutex_unlock(&apple_protect_pager_lock); + + if (pager == APPLE_PROTECT_PAGER_NULL) { + pager = apple_protect_pager_create(backing_object); + if (pager == APPLE_PROTECT_PAGER_NULL) { + return MEMORY_OBJECT_NULL; + } + } + + mutex_lock(&apple_protect_pager_lock); + while (!pager->is_ready) { + thread_sleep_mutex(&pager->is_ready, + &apple_protect_pager_lock, + THREAD_UNINT); + } + mutex_unlock(&apple_protect_pager_lock); + + return (memory_object_t) pager; +} + +void +apple_protect_pager_trim(void) +{ + apple_protect_pager_t pager, prev_pager; + queue_head_t trim_queue; + int num_trim; + int count_unmapped; + + mutex_lock(&apple_protect_pager_lock); + + /* + * We have too many pagers, try and trim some unused ones, + * starting with the oldest pager at the end of the queue. + */ + queue_init(&trim_queue); + num_trim = 0; + + for (pager = (apple_protect_pager_t) + queue_last(&apple_protect_pager_queue); + !queue_end(&apple_protect_pager_queue, + (queue_entry_t) pager); + pager = prev_pager) { + /* get prev elt before we dequeue */ + prev_pager = (apple_protect_pager_t) + queue_prev(&pager->pager_queue); + + if (pager->ref_count == 2 && + pager->is_ready && + !pager->is_mapped) { + /* this pager can be trimmed */ + num_trim++; + /* remove this pager from the main list ... */ + apple_protect_pager_dequeue(pager); + /* ... and add it to our trim queue */ + queue_enter_first(&trim_queue, + pager, + apple_protect_pager_t, + pager_queue); + + count_unmapped = (apple_protect_pager_count - + apple_protect_pager_count_mapped); + if (count_unmapped <= apple_protect_pager_cache_limit) { + /* we have enough pagers to trim */ + break; + } + } + } + if (num_trim > apple_protect_pager_num_trim_max) { + apple_protect_pager_num_trim_max = num_trim; + } + apple_protect_pager_num_trim_total += num_trim; + + mutex_unlock(&apple_protect_pager_lock); + + /* terminate the trimmed pagers */ + while (!queue_empty(&trim_queue)) { + queue_remove_first(&trim_queue, + pager, + apple_protect_pager_t, + pager_queue); + pager->pager_queue.next = NULL; + pager->pager_queue.prev = NULL; + assert(pager->ref_count == 2); + /* + * We can't call deallocate_internal() because the pager + * has already been dequeued, but we still need to remove + * a reference. + */ + pager->ref_count--; + apple_protect_pager_terminate_internal(pager); + } +} diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index e2aa4851f..ef81f0886 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -1675,7 +1675,7 @@ vm_fault_page( */ vm_object_paging_end(object); - vm_object_collapse(object, offset); + vm_object_collapse(object, offset, TRUE); vm_object_paging_begin(object); } @@ -2151,10 +2151,8 @@ vm_fault( unsigned int cache_attr; int write_startup_file = 0; boolean_t need_activation; - vm_prot_t full_fault_type; + vm_prot_t original_fault_type; - if (get_preemption_level() != 0) - return (KERN_FAILURE); KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_START, vaddr, @@ -2163,13 +2161,15 @@ vm_fault( 0, 0); - /* at present we do not fully check for execute permission */ - /* we generally treat it is read except in certain device */ - /* memory settings */ - full_fault_type = fault_type; - if(fault_type & VM_PROT_EXECUTE) { - fault_type &= ~VM_PROT_EXECUTE; - fault_type |= VM_PROT_READ; + if (get_preemption_level() != 0) { + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END, + vaddr, + 0, + KERN_FAILURE, + 0, + 0); + + return (KERN_FAILURE); } interruptible_state = thread_interrupt_level(interruptible); @@ -2184,12 +2184,15 @@ vm_fault( VM_STAT(faults++); current_task()->faults++; + original_fault_type = fault_type; + RetryFault: ; /* * Find the backing store object and offset into * it to begin the search. */ + fault_type = original_fault_type; map = original_map; vm_map_lock_read(map); kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version, @@ -2367,8 +2370,6 @@ vm_fault( FastMapInFault: m->busy = TRUE; - vm_object_paging_begin(object); - FastPmapEnter: /* * Check a couple of global reasons to @@ -2438,35 +2439,27 @@ vm_fault( * move active page to back of active * queue. This code doesn't. */ - vm_page_lock_queues(); - if (m->clustered) { vm_pagein_cluster_used++; m->clustered = FALSE; } - m->reference = TRUE; - if (change_wiring) { + vm_page_lock_queues(); + if (wired) vm_page_wire(m); else vm_page_unwire(m); + + vm_page_unlock_queues(); } -#if VM_FAULT_STATIC_CONFIG else { - if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) + if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) { + vm_page_lock_queues(); vm_page_activate(m); + vm_page_unlock_queues(); + } } -#else - else if (software_reference_bits) { - if (!m->active && !m->inactive) - vm_page_activate(m); - } - else if (!m->active) { - vm_page_activate(m); - } -#endif - vm_page_unlock_queues(); /* * That's it, clean up and return. @@ -2483,12 +2476,14 @@ vm_fault( * normal clustering behavior. */ if (!sequential && !object->private) { + vm_object_paging_begin(object); + write_startup_file = vm_fault_tws_insert(map, real_map, vaddr, object, cur_offset); - } - vm_object_paging_end(object); + vm_object_paging_end(object); + } vm_object_unlock(object); vm_map_unlock_read(map); @@ -2580,8 +2575,7 @@ vm_fault( */ vm_object_paging_end(object); - vm_object_collapse(object, offset); - vm_object_paging_begin(object); + vm_object_collapse(object, offset, TRUE); goto FastPmapEnter; } @@ -2658,9 +2652,6 @@ vm_fault( if (cur_object != object) vm_object_unlock(cur_object); - vm_object_paging_begin(object); - vm_object_unlock(object); - /* * Now zero fill page and map it. * the page is probably going to @@ -2709,7 +2700,6 @@ vm_fault( m->inactive = TRUE; vm_page_inactive_count++; vm_page_unlock_queues(); - vm_object_lock(object); goto FastPmapEnter; } @@ -3002,7 +2992,6 @@ vm_fault( } } else { -#ifndef i386 vm_map_entry_t entry; vm_map_offset_t laddr; vm_map_offset_t ldelta, hdelta; @@ -3012,12 +3001,13 @@ vm_fault( * in the object */ +#ifndef i386 /* While we do not worry about execution protection in */ /* general, certian pages may have instruction execution */ /* disallowed. We will check here, and if not allowed */ /* to execute, we return with a protection failure. */ - if((full_fault_type & VM_PROT_EXECUTE) && + if((fault_type & VM_PROT_EXECUTE) && (!pmap_eligible_for_execute((ppnum_t) (object->shadow_offset >> 12)))) { @@ -3029,6 +3019,7 @@ vm_fault( kr = KERN_PROTECTION_FAILURE; goto done; } +#endif /* !i386 */ if(real_map != map) { vm_map_unlock(real_map); @@ -3069,45 +3060,38 @@ vm_fault( } if(vm_map_lookup_entry(map, laddr, &entry) && - (entry->object.vm_object != NULL) && - (entry->object.vm_object == object)) { + (entry->object.vm_object != NULL) && + (entry->object.vm_object == object)) { + vm_map_offset_t phys_offset; + phys_offset = (entry->object.vm_object->shadow_offset + + entry->offset + + laddr + - entry->vme_start); + phys_offset -= ldelta; if(caller_pmap) { /* Set up a block mapped area */ - pmap_map_block(caller_pmap, + pmap_map_block( + caller_pmap, (addr64_t)(caller_pmap_addr - ldelta), - (((vm_map_offset_t) - (entry->object.vm_object->shadow_offset)) - + entry->offset + - (laddr - entry->vme_start) - - ldelta) >> 12, - ((ldelta + hdelta) >> 12), prot, - (VM_WIMG_MASK & (int)object->wimg_bits), 0); + phys_offset >> 12, + (ldelta + hdelta) >> 12, + prot, + (VM_WIMG_MASK & (int)object->wimg_bits), + 0); } else { /* Set up a block mapped area */ - pmap_map_block(real_map->pmap, - (addr64_t)(vaddr - ldelta), - (((vm_map_offset_t) - (entry->object.vm_object->shadow_offset)) - + entry->offset + - (laddr - entry->vme_start) - ldelta) >> 12, - ((ldelta + hdelta) >> 12), prot, - (VM_WIMG_MASK & (int)object->wimg_bits), 0); + pmap_map_block( + real_map->pmap, + (addr64_t)(vaddr - ldelta), + phys_offset >> 12, + (ldelta + hdelta) >> 12, + prot, + (VM_WIMG_MASK & (int)object->wimg_bits), + 0); } } -#else -#ifdef notyet - if(caller_pmap) { - pmap_enter(caller_pmap, caller_pmap_addr, - object->shadow_offset>>12, prot, 0, TRUE); - } else { - pmap_enter(pmap, vaddr, - object->shadow_offset>>12, prot, 0, TRUE); - } - /* Map it in */ -#endif -#endif } diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index 283b07822..0b283b4b7 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -73,7 +73,11 @@ #include #define ZONE_MAP_MIN (12 * 1024 * 1024) -#define ZONE_MAP_MAX (768 * 1024 * 1024) +/* Maximum Zone size is 1G */ +#define ZONE_MAP_MAX (1024 * 1024 * 1024) + +const vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; +const vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; /* * vm_mem_bootstrap initializes the virtual memory system. @@ -84,7 +88,8 @@ void vm_mem_bootstrap(void) { vm_offset_t start, end; - vm_size_t zsize; + vm_size_t zsizearg; + mach_vm_size_t zsize; /* * Initializes resident memory structures. @@ -104,8 +109,8 @@ vm_mem_bootstrap(void) kmem_init(start, end); pmap_init(); - if (PE_parse_boot_arg("zsize", &zsize)) - zsize = zsize * 1024 * 1024; + if (PE_parse_boot_arg("zsize", &zsizearg)) + zsize = zsizearg * 1024ULL * 1024ULL; else { zsize = sane_size >> 2; /* Get target zone size as 1/4 of physical memory */ } diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index d43706c8d..dfb0a0743 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -133,7 +133,7 @@ kmem_alloc_contig( object = vm_object_allocate(map_size); } - kr = vm_map_find_space(map, &map_addr, map_size, map_mask, &entry); + kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); return kr; @@ -161,6 +161,7 @@ kmem_alloc_contig( for (i = 0; i < map_size; i += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; m->busy = FALSE; vm_page_insert(m, object, offset + i); } @@ -199,6 +200,10 @@ kmem_alloc_contig( * KMA_HERE *addrp is base address, else "anywhere" * KMA_NOPAGEWAIT don't wait for pages if unavailable * KMA_KOBJECT use kernel_object + * KMA_LOMEM support for 32 bit devices in a 64 bit world + * if set and a lomemory pool is available + * grab pages from it... this also implies + * KMA_NOPAGEWAIT */ kern_return_t @@ -222,6 +227,12 @@ kernel_memory_allocate( *addrp = 0; return KERN_INVALID_ARGUMENT; } + if (flags & KMA_LOMEM) { + if ( !(flags & KMA_NOPAGEWAIT) ) { + *addrp = 0; + return KERN_INVALID_ARGUMENT; + } + } map_size = vm_map_round_page(size); map_mask = (vm_map_offset_t) mask; @@ -237,12 +248,11 @@ kernel_memory_allocate( object = vm_object_allocate(map_size); } - kr = vm_map_find_space(map, &map_addr, map_size, map_mask, &entry); + kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); return kr; } - entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? map_addr - VM_MIN_KERNEL_ADDRESS : 0; @@ -254,8 +264,15 @@ kernel_memory_allocate( for (i = 0; i < map_size; i += PAGE_SIZE) { vm_page_t mem; - while (VM_PAGE_NULL == - (mem = vm_page_alloc(object, offset + i))) { + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_alloclo(object, offset + i); + else + mem = vm_page_alloc(object, offset + i); + + if (mem != VM_PAGE_NULL) + break; + if (flags & KMA_NOPAGEWAIT) { if (object == kernel_object) vm_object_page_remove(object, offset, offset + i); @@ -383,7 +400,7 @@ kmem_realloc( */ kr = vm_map_find_space(map, &newmapaddr, newmapsize, - (vm_map_offset_t) 0, &newentry); + (vm_map_offset_t) 0, 0, &newentry); if (kr != KERN_SUCCESS) { vm_object_lock(object); for(offset = oldmapsize; diff --git a/osfmk/vm/vm_kern.h b/osfmk/vm/vm_kern.h index 8f18cf793..a8bc5ec11 100644 --- a/osfmk/vm/vm_kern.h +++ b/osfmk/vm/vm_kern.h @@ -77,6 +77,7 @@ extern kern_return_t kernel_memory_allocate( #define KMA_HERE 0x01 #define KMA_NOPAGEWAIT 0x02 #define KMA_KOBJECT 0x04 +#define KMA_LOMEM 0x08 extern kern_return_t kmem_alloc_contig( vm_map_t map, diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 020000d67..3f0d8bf31 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -186,11 +187,11 @@ static boolean_t vm_map_fork_copy( vm_map_entry_t *old_entry_p, vm_map_t new_map); -static void vm_map_region_top_walk( +void vm_map_region_top_walk( vm_map_entry_t entry, vm_region_top_info_t top); -static void vm_map_region_walk( +void vm_map_region_walk( vm_map_t map, vm_map_offset_t va, vm_map_entry_t entry, @@ -376,6 +377,91 @@ static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */ /* Skip acquiring locks if we're in the midst of a kernel core dump */ extern unsigned int not_in_kdp; +#ifdef __i386__ +kern_return_t +vm_map_apple_protected( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + boolean_t map_locked; + kern_return_t kr; + vm_map_entry_t map_entry; + memory_object_t protected_mem_obj; + vm_object_t protected_object; + vm_map_offset_t map_addr; + + vm_map_lock_read(map); + map_locked = TRUE; + + /* lookup the protected VM object */ + if (!vm_map_lookup_entry(map, + start, + &map_entry) || + map_entry->vme_end != end || + map_entry->is_sub_map) { + /* that memory is not properly mapped */ + kr = KERN_INVALID_ARGUMENT; + goto done; + } + protected_object = map_entry->object.vm_object; + if (protected_object == VM_OBJECT_NULL) { + /* there should be a VM object here at this point */ + kr = KERN_INVALID_ARGUMENT; + goto done; + } + + /* + * Lookup (and create if necessary) the protected memory object + * matching that VM object. + * If successful, this also grabs a reference on the memory object, + * to guarantee that it doesn't go away before we get a chance to map + * it. + */ + + protected_mem_obj = apple_protect_pager_setup(protected_object); + if (protected_mem_obj == NULL) { + kr = KERN_FAILURE; + goto done; + } + + vm_map_unlock_read(map); + map_locked = FALSE; + + /* map this memory object in place of the current one */ + map_addr = start; + kr = mach_vm_map(map, + &map_addr, + end - start, + (mach_vm_offset_t) 0, + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + (ipc_port_t) protected_mem_obj, + map_entry->offset + (start - map_entry->vme_start), + TRUE, + map_entry->protection, + map_entry->max_protection, + map_entry->inheritance); + assert(map_addr == start); + if (kr == KERN_SUCCESS) { + /* let the pager know that this mem_obj is mapped */ + apple_protect_pager_map(protected_mem_obj); + } + /* + * Release the reference obtained by apple_protect_pager_setup(). + * The mapping (if it succeeded) is now holding a reference on the + * memory object. + */ + memory_object_deallocate(protected_mem_obj); + +done: + if (map_locked) { + vm_map_unlock_read(map); + } + return kr; +} +#endif /* __i386__ */ + + void vm_map_init( void) @@ -734,12 +820,13 @@ vm_map_destroy( VM_MAP_NULL); vm_map_unlock(map); -#ifdef __PPC__ if (map->hdr.nentries!=0) - vm_map_remove_commpage64(map); -#endif /* __PPC__ */ + vm_map_remove_commpage(map); - assert(map->hdr.nentries==0); +// assert(map->hdr.nentries==0); +// if(map->hdr.nentries) { /* (BRINGUP) */ +// panic("vm_map_destroy: hdr.nentries is not 0 (%d) in map %08X\n", map->hdr.nentries, map); +// } if(map->pmap) pmap_destroy(map->pmap); @@ -932,16 +1019,32 @@ void vm_map_swapout(vm_map_t map) /* - * SAVE_HINT: + * SAVE_HINT_MAP_READ: * * Saves the specified entry as the hint for - * future lookups. Performs necessary interlocks. + * future lookups. only a read lock is held on map, + * so make sure the store is atomic... OSCompareAndSwap + * guarantees this... also, we don't care if we collide + * and someone else wins and stores their 'hint' */ -#define SAVE_HINT(map,value) \ +#define SAVE_HINT_MAP_READ(map,value) \ +MACRO_BEGIN \ + OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \ +MACRO_END + + +/* + * SAVE_HINT_MAP_WRITE: + * + * Saves the specified entry as the hint for + * future lookups. write lock held on map, + * so no one else can be writing or looking + * until the lock is dropped, so it's safe + * to just do an assignment + */ +#define SAVE_HINT_MAP_WRITE(map,value) \ MACRO_BEGIN \ - mutex_lock(&(map)->s_lock); \ (map)->hint = (value); \ - mutex_unlock(&(map)->s_lock); \ MACRO_END /* @@ -967,11 +1070,7 @@ vm_map_lookup_entry( * Start looking either from the head of the * list, or from the hint. */ - if (not_in_kdp) - mutex_lock(&map->s_lock); cur = map->hint; - if (not_in_kdp) - mutex_unlock(&map->s_lock); if (cur == vm_map_to_entry(map)) cur = cur->vme_next; @@ -1015,8 +1114,8 @@ vm_map_lookup_entry( */ *entry = cur; - if (not_in_kdp) - SAVE_HINT(map, cur); + SAVE_HINT_MAP_READ(map, cur); + return(TRUE); } break; @@ -1024,8 +1123,8 @@ vm_map_lookup_entry( cur = cur->vme_next; } *entry = cur->vme_prev; - if (not_in_kdp) - SAVE_HINT(map, *entry); + SAVE_HINT_MAP_READ(map, *entry); + return(FALSE); } @@ -1048,6 +1147,7 @@ vm_map_find_space( vm_map_offset_t *address, /* OUT */ vm_map_size_t size, vm_map_offset_t mask, + int flags, vm_map_entry_t *o_entry) /* OUT */ { register vm_map_entry_t entry, new_entry; @@ -1162,6 +1262,8 @@ vm_map_find_space( new_entry->in_transition = FALSE; new_entry->needs_wakeup = FALSE; + VM_GET_FLAGS_ALIAS(flags, new_entry->alias); + /* * Insert the new entry into the list */ @@ -1173,7 +1275,7 @@ vm_map_find_space( /* * Update the lookup hint */ - SAVE_HINT(map, new_entry); + SAVE_HINT_MAP_WRITE(map, new_entry); *o_entry = new_entry; return(KERN_SUCCESS); @@ -1342,7 +1444,7 @@ vm_map_enter( } VM_GET_FLAGS_ALIAS(flags, alias); - + #define RETURN(value) { result = value; goto BailOut; } assert(page_aligned(*address)); @@ -1589,9 +1691,20 @@ vm_map_enter( * LP64todo - for now, we can only allocate 4GB internal objects * because the default pager can't page bigger ones. Remove this * when it can. + * + * XXX FBDP + * The reserved "page zero" in each process's address space can + * be arbitrarily large. Splitting it into separate 4GB objects and + * therefore different VM map entries serves no purpose and just + * slows down operations on the VM map, so let's not split the + * allocation into 4GB chunks if the max protection is NONE. That + * memory should never be accessible, so it will never get to the + * default pager. */ tmp_start = start; - if (object == VM_OBJECT_NULL && size > (vm_map_size_t)VM_MAX_ADDRESS) + if (object == VM_OBJECT_NULL && + size > (vm_map_size_t)VM_MAX_ADDRESS && + max_protection != VM_PROT_NONE) tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; else tmp_end = end; @@ -1602,8 +1715,7 @@ vm_map_enter( VM_BEHAVIOR_DEFAULT, inheritance, 0); new_entry->alias = alias; entry = new_entry; - } while (object == VM_OBJECT_NULL && - tmp_end != end && + } while (tmp_end != end && (tmp_start = tmp_end) && (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); @@ -1629,6 +1741,11 @@ vm_map_enter( (!needs_copy) && (size < (128*1024))) { pmap_empty = FALSE; /* pmap won't be empty */ + +#ifdef STACK_ONLY_NX + if (alias != VM_MEMORY_STACK && cur_protection) + cur_protection |= VM_PROT_EXECUTE; +#endif vm_map_pmap_enter(map, start, end, object, offset, cur_protection); } @@ -1736,7 +1853,7 @@ vm_map_enter( #if VM_CPM #ifdef MACH_ASSERT -extern vm_offset_t avail_start, avail_end; +extern pmap_paddr_t avail_start, avail_end; #endif /* @@ -1809,6 +1926,7 @@ vm_map_enter_cpm( for (offset = 0; offset < size; offset += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; assert(!m->gobbled); assert(!m->wanted); @@ -1822,7 +1940,7 @@ vm_map_enter_cpm( */ ASSERT_PAGE_DECRYPTED(m); assert(m->busy); - assert(m->phys_page>=avail_start && m->phys_page<=avail_end); + assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT)); m->busy = FALSE; vm_page_insert(m, cpm_obj, offset); @@ -1968,7 +2086,7 @@ vm_map_enter_cpm( * the specified address; if necessary, * it splits the entry into two. */ -#ifndef i386 +#ifndef NO_NESTED_PMAP #define vm_map_clip_start(map, entry, startaddr) \ MACRO_BEGIN \ vm_map_t VMCS_map; \ @@ -1995,7 +2113,7 @@ MACRO_BEGIN \ } \ UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \ MACRO_END -#else +#else /* NO_NESTED_PMAP */ #define vm_map_clip_start(map, entry, startaddr) \ MACRO_BEGIN \ vm_map_t VMCS_map; \ @@ -2009,7 +2127,7 @@ MACRO_BEGIN \ } \ UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \ MACRO_END -#endif +#endif /* NO_NESTED_PMAP */ #define vm_map_copy_clip_start(copy, entry, startaddr) \ MACRO_BEGIN \ @@ -2060,7 +2178,7 @@ _vm_map_clip_start( * the specified address; if necessary, * it splits the entry into two. */ -#ifndef i386 +#ifndef NO_NESTED_PMAP #define vm_map_clip_end(map, entry, endaddr) \ MACRO_BEGIN \ vm_map_t VMCE_map; \ @@ -2087,7 +2205,7 @@ MACRO_BEGIN \ } \ UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \ MACRO_END -#else +#else /* NO_NESTED_PMAP */ #define vm_map_clip_end(map, entry, endaddr) \ MACRO_BEGIN \ vm_map_t VMCE_map; \ @@ -2101,7 +2219,8 @@ MACRO_BEGIN \ } \ UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \ MACRO_END -#endif +#endif /* NO_NESTED_PMAP */ + #define vm_map_copy_clip_end(copy, entry, endaddr) \ MACRO_BEGIN \ @@ -2243,9 +2362,9 @@ vm_map_submap( vm_map_offset_t end, vm_map_t submap, vm_map_offset_t offset, -#ifdef i386 +#ifdef NO_NESTED_PMAP __unused -#endif +#endif /* NO_NESTED_PMAP */ boolean_t use_pmap) { vm_map_entry_t entry; @@ -2284,11 +2403,11 @@ vm_map_submap( entry->is_sub_map = TRUE; entry->object.sub_map = submap; vm_map_reference(submap); -#ifndef i386 +#ifndef NO_NESTED_PMAP if ((use_pmap) && (offset == 0)) { /* nest if platform code will allow */ if(submap->pmap == NULL) { - submap->pmap = pmap_create((vm_map_size_t) 0); + submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); if(submap->pmap == PMAP_NULL) { vm_map_unlock(map); return(KERN_NO_SPACE); @@ -2302,10 +2421,9 @@ vm_map_submap( panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); entry->use_pmap = TRUE; } -#endif -#ifdef i386 +#else /* NO_NESTED_PMAP */ pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); -#endif +#endif /* NO_NESTED_PMAP */ result = KERN_SUCCESS; } vm_map_unlock(map); @@ -2454,14 +2572,15 @@ vm_map_protect( if(current->is_sub_map && current->use_pmap) { vm_map_offset_t pmap_base_addr; vm_map_offset_t pmap_end_addr; -#ifdef i386 +#ifdef NO_NESTED_PMAP __unused -#endif +#endif /* NO_NESTED_PMAP */ vm_map_entry_t local_entry; + pmap_base_addr = 0xF0000000 & current->vme_start; pmap_end_addr = (pmap_base_addr + 0x10000000) - 1; -#ifndef i386 +#ifndef NO_NESTED_PMAP if(!vm_map_lookup_entry(map, pmap_base_addr, &local_entry)) panic("vm_map_protect: nested pmap area is missing"); @@ -2471,21 +2590,29 @@ vm_map_protect( local_entry = local_entry->vme_next; } pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); -#endif +#endif /* NO_NESTED_PMAP */ } if (!(current->protection & VM_PROT_WRITE)) { /* Look one level in we support nested pmaps */ /* from mapped submaps which are direct entries */ /* in our map */ - if(current->is_sub_map && current->use_pmap) { + + vm_prot_t prot; + + prot = current->protection; +#ifdef STACK_ONLY_NX + if (current->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif + if (current->is_sub_map && current->use_pmap) { pmap_protect(current->object.sub_map->pmap, current->vme_start, current->vme_end, - current->protection); + prot); } else { pmap_protect(map->pmap, current->vme_start, current->vme_end, - current->protection); + prot); } } } @@ -3445,6 +3572,7 @@ vm_map_entry_delete( } + void vm_map_submap_pmap_clean( vm_map_t map, @@ -3589,7 +3717,7 @@ vm_map_delete( * Fix the lookup hint now, rather than each * time through the loop. */ - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } else { entry = first_entry->vme_next; } @@ -3645,7 +3773,7 @@ vm_map_delete( entry = first_entry->vme_next; } else { entry = first_entry; - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } last_timestamp = map->timestamp; continue; @@ -3708,7 +3836,7 @@ vm_map_delete( entry = first_entry->vme_next; } else { entry = first_entry; - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } last_timestamp = map->timestamp; continue; @@ -3746,10 +3874,10 @@ vm_map_delete( (!entry->is_sub_map)); first_entry = first_entry->vme_next; } else { - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } } else { - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); first_entry = entry; } @@ -3783,9 +3911,10 @@ vm_map_delete( entry->is_sub_map) { if(entry->is_sub_map) { if(entry->use_pmap) { -#ifndef i386 - pmap_unnest(map->pmap, (addr64_t)entry->vme_start); -#endif +#ifndef NO_NESTED_PMAP + pmap_unnest(map->pmap, + (addr64_t)entry->vme_start); +#endif /* NO_NESTED_PMAP */ if((map->mapped) && (map->ref_count)) { /* clean up parent map/maps */ vm_map_submap_pmap_clean( @@ -3811,8 +3940,8 @@ vm_map_delete( VM_PROT_NONE); } else { pmap_remove(map->pmap, - entry->vme_start, - entry->vme_end); + (addr64_t)entry->vme_start, + (addr64_t)entry->vme_end); } } } @@ -3865,7 +3994,7 @@ vm_map_delete( if (!vm_map_lookup_entry(map, s, &entry)){ entry = entry->vme_next; } else { - SAVE_HINT(map, entry->vme_prev); + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } /* * others can not only allocate behind us, we can @@ -3890,6 +4019,7 @@ vm_map_delete( return KERN_SUCCESS; } + /* * vm_map_remove: * @@ -5140,10 +5270,10 @@ vm_map_copy_overwrite_aligned( if (old_object != VM_OBJECT_NULL) { if(entry->is_sub_map) { if(entry->use_pmap) { -#ifndef i386 +#ifndef NO_NESTED_PMAP pmap_unnest(dst_map->pmap, - entry->vme_start); -#endif + (addr64_t)entry->vme_start); +#endif /* NO_NESTED_PMAP */ if(dst_map->mapped) { /* clean up parent */ /* map/maps */ @@ -5243,6 +5373,10 @@ vm_map_copy_overwrite_aligned( */ if (entry->needs_copy) prot &= ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif /* It is our policy to require */ /* explicit sync from anyone */ /* writing code and then */ @@ -5260,7 +5394,7 @@ vm_map_copy_overwrite_aligned( if (!m->active && !m->inactive) vm_page_activate(m); vm_page_unlock_queues(); - PAGE_WAKEUP_DONE(m); + PAGE_WAKEUP_DONE(m); } vm_object_paging_end(object); vm_object_unlock(object); @@ -5726,6 +5860,7 @@ vm_map_copyout( register vm_map_offset_t va; vm_object_offset_t offset; register vm_object_t object; + vm_prot_t prot; object = entry->object.vm_object; offset = entry->offset; @@ -5776,8 +5911,12 @@ vm_map_copyout( m->busy = TRUE; vm_object_unlock(object); - - PMAP_ENTER(dst_map->pmap, va, m, entry->protection, + prot = entry->protection; +#ifdef STACK_ONLY_NX + if (entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif + PMAP_ENTER(dst_map->pmap, va, m, prot, ((unsigned int) (m->object->wimg_bits)) & VM_WIMG_MASK, @@ -5838,7 +5977,10 @@ vm_map_copyout( prot = entry->protection; if (entry->needs_copy) prot &= ~VM_PROT_WRITE; - +#ifdef STACK_ONLY_NX + if (entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif PMAP_ENTER(dst_map->pmap, va, m, prot, ((unsigned int) @@ -5873,7 +6015,7 @@ vm_map_copyout( * Update the hints and the map size */ - SAVE_HINT(dst_map, vm_map_copy_last_entry(copy)); + SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); dst_map->size += size; @@ -6202,6 +6344,13 @@ vm_map_copyin_common( */ if (src_needs_copy && !tmp_entry->needs_copy) { + vm_prot_t prot; + + prot = src_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (src_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif vm_object_pmap_protect( src_object, src_offset, @@ -6210,8 +6359,8 @@ vm_map_copyin_common( PMAP_NULL : src_map->pmap), src_entry->vme_start, - src_entry->protection & - ~VM_PROT_WRITE); + prot); + tmp_entry->needs_copy = TRUE; } @@ -6502,7 +6651,7 @@ vm_map_fork_share( object = old_entry->object.vm_object; if (old_entry->is_sub_map) { assert(old_entry->wired_count == 0); -#ifndef i386 +#ifndef NO_NESTED_PMAP if(old_entry->use_pmap) { kern_return_t result; @@ -6514,7 +6663,7 @@ vm_map_fork_share( if(result) panic("vm_map_fork_share: pmap_nest failed!"); } -#endif +#endif /* NO_NESTED_PMAP */ } else if (object == VM_OBJECT_NULL) { object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - old_entry->vme_start)); @@ -6631,7 +6780,14 @@ vm_map_fork_share( if (!old_entry->needs_copy && (old_entry->protection & VM_PROT_WRITE)) { - if(old_map->mapped) { + vm_prot_t prot; + + prot = old_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (old_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif + if (old_map->mapped) { vm_object_pmap_protect( old_entry->object.vm_object, old_entry->offset, @@ -6639,12 +6795,12 @@ vm_map_fork_share( old_entry->vme_start), PMAP_NULL, old_entry->vme_start, - old_entry->protection & ~VM_PROT_WRITE); + prot); } else { pmap_protect(old_map->pmap, old_entry->vme_start, old_entry->vme_end, - old_entry->protection & ~VM_PROT_WRITE); + prot); } } @@ -6786,7 +6942,9 @@ vm_map_t vm_map_fork( vm_map_t old_map) { - pmap_t new_pmap = pmap_create((vm_map_size_t) 0); + pmap_t new_pmap = pmap_create( + (vm_map_size_t) 0, + task_has_64BitAddr(current_task())); vm_map_t new_map; vm_map_entry_t old_entry; vm_map_size_t new_size = 0, entry_size; @@ -6855,6 +7013,13 @@ vm_map_fork( */ if (src_needs_copy && !old_entry->needs_copy) { + vm_prot_t prot; + + prot = old_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (old_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif vm_object_pmap_protect( old_entry->object.vm_object, old_entry->offset, @@ -6865,7 +7030,7 @@ vm_map_fork( ? PMAP_NULL : old_map->pmap), old_entry->vme_start, - old_entry->protection & ~VM_PROT_WRITE); + prot); old_entry->needs_copy = TRUE; } @@ -6953,10 +7118,7 @@ vm_map_lookup_locked( * If the map has an interesting hint, try it before calling * full blown lookup routine. */ - - mutex_lock(&map->s_lock); entry = map->hint; - mutex_unlock(&map->s_lock); if ((entry == vm_map_to_entry(map)) || (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) { @@ -7133,12 +7295,17 @@ vm_map_lookup_locked( ©_object); copied_slowly = TRUE; } else { - /* set up shadow object */ copy_object = submap_entry->object.vm_object; vm_object_reference(copy_object); submap_entry->object.vm_object->shadowed = TRUE; submap_entry->needs_copy = TRUE; + + prot = submap_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (submap_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif vm_object_pmap_protect( submap_entry->object.vm_object, submap_entry->offset, @@ -7148,8 +7315,7 @@ vm_map_lookup_locked( || map->mapped) ? PMAP_NULL : map->pmap, submap_entry->vme_start, - submap_entry->protection & - ~VM_PROT_WRITE); + prot); } @@ -7223,14 +7389,25 @@ vm_map_lookup_locked( * Check whether this task is allowed to have * this page. */ - prot = entry->protection; + +#ifdef STACK_ONLY_NX + if (entry->alias != VM_MEMORY_STACK && prot) + /* + * HACK -- if not a stack, than allow execution + */ + prot |= VM_PROT_EXECUTE; +#endif if ((fault_type & (prot)) != fault_type) { - if (*real_map != map) { - vm_map_unlock(*real_map); - } - *real_map = map; - return KERN_PROTECTION_FAILURE; + if (*real_map != map) { + vm_map_unlock(*real_map); + } + *real_map = map; + + if ((fault_type & VM_PROT_EXECUTE) && prot) + log_nx_failure((addr64_t)vaddr, prot); + + return KERN_PROTECTION_FAILURE; } /* @@ -7240,7 +7417,7 @@ vm_map_lookup_locked( *wired = (entry->wired_count != 0); if (*wired) - prot = fault_type = entry->protection; + fault_type = prot; /* * If the entry was copy-on-write, we either ... @@ -7866,7 +8043,7 @@ vm_map_region( } } -static void +void vm_map_region_top_walk( vm_map_entry_t entry, vm_region_top_info_t top) @@ -7929,7 +8106,7 @@ vm_map_region_top_walk( } } -static void +void vm_map_region_walk( vm_map_t map, vm_map_offset_t va, @@ -8208,7 +8385,7 @@ vm_map_simplify_entry( this_entry->offset = prev_entry->offset; vm_object_deallocate(prev_entry->object.vm_object); vm_map_entry_dispose(map, prev_entry); - SAVE_HINT(map, this_entry); + SAVE_HINT_MAP_WRITE(map, this_entry); counter(c_vm_map_simplified++); } } @@ -8805,6 +8982,7 @@ vm_map_entry_insert( new_entry->wired_count = wired_count; new_entry->user_wired_count = 0; new_entry->use_pmap = FALSE; + new_entry->alias = 0; /* * Insert the new entry into the list. @@ -8817,7 +8995,7 @@ vm_map_entry_insert( * Update the free space hint and the lookup hint. */ - SAVE_HINT(map, new_entry); + SAVE_HINT_MAP_WRITE(map, new_entry); return new_entry; } @@ -8943,6 +9121,13 @@ vm_map_remap_extract( if (!src_entry->needs_copy && (src_entry->protection & VM_PROT_WRITE)) { + vm_prot_t prot; + + prot = src_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (src_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif if(map->mapped) { vm_object_pmap_protect( src_entry->object.vm_object, @@ -8950,14 +9135,12 @@ vm_map_remap_extract( entry_size, PMAP_NULL, src_entry->vme_start, - src_entry->protection & - ~VM_PROT_WRITE); + prot); } else { pmap_protect(vm_map_pmap(map), - src_entry->vme_start, - src_entry->vme_end, - src_entry->protection & - ~VM_PROT_WRITE); + src_entry->vme_start, + src_entry->vme_end, + prot); } } @@ -9017,6 +9200,13 @@ vm_map_remap_extract( * Handle copy_on_write semantics. */ if (src_needs_copy && !src_entry->needs_copy) { + vm_prot_t prot; + + prot = src_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (src_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif vm_object_pmap_protect(object, offset, entry_size, @@ -9024,8 +9214,7 @@ vm_map_remap_extract( || map->mapped) ? PMAP_NULL : map->pmap), src_entry->vme_start, - src_entry->protection & - ~VM_PROT_WRITE); + prot); src_entry->needs_copy = TRUE; } @@ -9167,7 +9356,7 @@ vm_map_remap( { kern_return_t result; vm_map_entry_t entry; - vm_map_entry_t insp_entry; + vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; vm_map_entry_t new_entry; struct vm_map_header map_header; @@ -9230,7 +9419,7 @@ vm_map_remap( if (result == KERN_SUCCESS) { target_map->size += size; - SAVE_HINT(target_map, insp_entry); + SAVE_HINT_MAP_WRITE(target_map, insp_entry); } vm_map_unlock(target_map); @@ -9790,13 +9979,20 @@ kern_return_t vm_map_region_replace( } if ((entry->use_pmap) && (new_submap->pmap == NULL)) { - new_submap->pmap = pmap_create((vm_map_size_t) 0); + new_submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); if(new_submap->pmap == PMAP_NULL) { vm_map_unlock(old_submap); vm_map_unlock(target_map); return(KERN_NO_SPACE); } } + + /* + * Mark the new submap as "mapped", so that we get proper + * cleanup of the sub-pmap when we unmap it. + */ + new_submap->mapped = TRUE; + addr = entry->vme_start; vm_map_reference(old_submap); while((entry != vm_map_to_entry(target_map)) && @@ -9821,7 +10017,7 @@ kern_return_t vm_map_region_replace( addr = entry->vme_start; } if(nested_pmap) { -#ifndef i386 +#ifndef NO_NESTED_PMAP pmap_unnest(target_map->pmap, (addr64_t)start); if(target_map->mapped) { vm_map_submap_pmap_clean(target_map, @@ -9830,7 +10026,7 @@ kern_return_t vm_map_region_replace( pmap_nest(target_map->pmap, new_submap->pmap, (addr64_t)start, (addr64_t)start, (uint64_t)(end - start)); -#endif /* i386 */ +#endif /* NO_NESTED_PMAP */ } else { vm_map_submap_pmap_clean(target_map, start, end, old_submap, 0); @@ -10385,91 +10581,212 @@ vm_map_deallocate( vm_map_destroy(map); } -#ifdef __PPC__ /* LP64todo - this whole mechanism is temporary. It should be redone when * the pmap layer can handle 64-bit address spaces. Until then, we trump * up a map entry for the 64-bit commpage above the map's max_offset. */ extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */ -SInt32 commpage64s_in_use = 0; +extern vm_map_t com_region_map32; /* the submap for 32-bit commpage */ + -void -vm_map_commpage64( - vm_map_t map ) +static void +vm_map_commpage( + vm_map_t user_map, + vm_map_t com_region_map, /* com_region_map32 or com_region_map64 */ + vm_map_offset_t base_address, + vm_map_size_t size) { vm_map_entry_t entry; vm_object_t object; - vm_map_lock(map); + vm_map_lock(user_map); /* The commpage is necessarily the last entry in the map. * See if one is already there (not sure if this can happen???) */ - entry = vm_map_last_entry(map); - if (entry != vm_map_to_entry(map)) { - if (entry->vme_end >= (vm_map_offset_t)_COMM_PAGE_BASE_ADDRESS) { - vm_map_unlock(map); + entry = vm_map_last_entry(user_map); + if (entry != vm_map_to_entry(user_map)) { + if (entry->vme_end >= base_address) { + vm_map_unlock(user_map); return; } } - entry = vm_map_first_entry(com_region_map64); /* the 64-bit commpage */ + entry = vm_map_first_entry(com_region_map); object = entry->object.vm_object; vm_object_reference(object); - + /* We bypass vm_map_enter() because we are adding the entry past the * map's max_offset. */ entry = vm_map_entry_insert( - map, - vm_map_last_entry(map), /* insert after last entry */ - _COMM_PAGE_BASE_ADDRESS, - _COMM_PAGE_BASE_ADDRESS+_COMM_PAGE_AREA_USED, + user_map, + vm_map_last_entry(user_map), /* insert after last entry */ + base_address, + base_address + size, object, 0, /* offset */ FALSE, /* needs_copy */ FALSE, /* is_shared */ FALSE, /* in_transition */ - VM_PROT_READ, - VM_PROT_READ, + VM_PROT_READ|VM_PROT_EXECUTE, + VM_PROT_READ|VM_PROT_EXECUTE, VM_BEHAVIOR_DEFAULT, VM_INHERIT_NONE, 1 ); /* wired_count */ - vm_map_unlock(map); - - OSIncrementAtomic(&commpage64s_in_use); + vm_map_unlock(user_map); +} + +#ifdef __i386__ +void +vm_map_commpage32( + vm_map_t map) +{ + vm_map_commpage(map, + com_region_map32, + (vm_map_offset_t) (unsigned) _COMM_PAGE32_BASE_ADDRESS, + (vm_map_size_t) (unsigned) _COMM_PAGE32_AREA_USED); } +#endif /* __i386__ */ + -/* LP64todo - remove this! */ +void +vm_map_commpage64( + vm_map_t map) +{ + + vm_map_commpage(map, + com_region_map64, + (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS, + (vm_map_size_t) _COMM_PAGE64_AREA_USED); +} void -vm_map_remove_commpage64( +vm_map_remove_commpage( vm_map_t map ) { vm_map_entry_t entry; - int deleted = 0; while( 1 ) { vm_map_lock(map); entry = vm_map_last_entry(map); + if ((entry == vm_map_to_entry(map)) || - (entry->vme_start < (vm_map_offset_t)_COMM_PAGE_BASE_ADDRESS)) + (entry->vme_start < map->max_offset)) break; /* clearing the wired count isn't strictly correct */ entry->wired_count = 0; vm_map_entry_delete(map,entry); - deleted++; } vm_map_unlock(map); - - if (deleted != 0) - OSDecrementAtomic(&commpage64s_in_use); } -#endif /* __PPC__ */ +void +vm_map_disable_NX(vm_map_t map) +{ + if (map == NULL) + return; + if (map->pmap == NULL) + return; + + pmap_disable_NX(map->pmap); +} + +/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS) + * more descriptive. + */ +void +vm_map_set_32bit(vm_map_t map) +{ + map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; +} + + +void +vm_map_set_64bit(vm_map_t map) +{ + map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS; +} + +vm_map_offset_t +vm_compute_max_offset(unsigned is64) +{ + return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); +} + +boolean_t +vm_map_has_4GB_pagezero(vm_map_t map) +{ + /* + * XXX FBDP + * We should lock the VM map (for read) here but we can get away + * with it for now because there can't really be any race condition: + * the VM map's min_offset is changed only when the VM map is created + * and when the zero page is established (when the binary gets loaded), + * and this routine gets called only when the task terminates and the + * VM map is being torn down, and when a new map is created via + * load_machfile()/execve(). + */ + return (map->min_offset >= 0x100000000ULL); +} + +void +vm_map_set_4GB_pagezero(vm_map_t map) +{ + pmap_set_4GB_pagezero(map->pmap); +} + +void +vm_map_clear_4GB_pagezero(vm_map_t map) +{ + pmap_clear_4GB_pagezero(map->pmap); +} + +/* + * Raise a VM map's minimum offset. + * To strictly enforce "page zero" reservation. + */ +kern_return_t +vm_map_raise_min_offset( + vm_map_t map, + vm_map_offset_t new_min_offset) +{ + vm_map_entry_t first_entry; + + new_min_offset = vm_map_round_page(new_min_offset); + + vm_map_lock(map); + + if (new_min_offset < map->min_offset) { + /* + * Can't move min_offset backwards, as that would expose + * a part of the address space that was previously, and for + * possibly good reasons, inaccessible. + */ + vm_map_unlock(map); + return KERN_INVALID_ADDRESS; + } + + first_entry = vm_map_first_entry(map); + if (first_entry != vm_map_to_entry(map) && + first_entry->vme_start < new_min_offset) { + /* + * Some memory was already allocated below the new + * minimun offset. It's too late to change it now... + */ + vm_map_unlock(map); + return KERN_NO_SPACE; + } + + map->min_offset = new_min_offset; + + vm_map_unlock(map); + + return KERN_SUCCESS; +} diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index 05a1ddbbf..02bf60eea 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -404,6 +404,7 @@ extern kern_return_t vm_map_find_space( vm_map_address_t *address, /* OUT */ vm_map_size_t size, vm_map_offset_t mask, + int flags, vm_map_entry_t *o_entry); /* OUT */ /* Lookup map entry containing or the specified address in the given map */ @@ -840,6 +841,31 @@ extern kern_return_t vm_map_copyin_common( vm_map_copy_t *copy_result, /* OUT */ boolean_t use_maxprot); +extern void vm_map_disable_NX( + vm_map_t map); + +extern void vm_map_set_64bit( + vm_map_t map); + +extern void vm_map_set_32bit( + vm_map_t map); + +extern boolean_t vm_map_has_4GB_pagezero( + vm_map_t map); + +extern void vm_map_set_4GB_pagezero( + vm_map_t map); + +extern void vm_map_clear_4GB_pagezero( + vm_map_t map); + +extern kern_return_t vm_map_raise_min_offset( + vm_map_t map, + vm_map_offset_t new_min_offset); + +extern vm_map_offset_t vm_compute_max_offset( + unsigned is64); + /* * Macros to invoke vm_map_copyin_common. vm_map_copyin is the * usual form; it handles a copyin based on the current protection diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index 5effdae7f..1520fb6d7 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -197,7 +197,7 @@ static zone_t vm_object_zone; /* vm backing store zone */ * memory object (kernel_object) to avoid wasting data structures. */ static struct vm_object kernel_object_store; -__private_extern__ vm_object_t kernel_object = &kernel_object_store; +vm_object_t kernel_object; /* * The submap object is used as a placeholder for vm_map_submap @@ -649,15 +649,22 @@ vm_object_deallocate( if (object->ref_count == 1 && object->shadow != VM_OBJECT_NULL) { /* - * We don't use this VM object anymore. We - * would like to collapse it into its parent(s), - * but we don't have any pointers back to these - * parent object(s). + * There's only one reference left on this + * VM object. We can't tell if it's a valid + * one (from a mapping for example) or if this + * object is just part of a possibly stale and + * useless shadow chain. + * We would like to try and collapse it into + * its parent, but we don't have any pointers + * back to this parent object. * But we can try and collapse this object with * its own shadows, in case these are useless * too... + * We can't bypass this object though, since we + * don't know if this last reference on it is + * meaningful or not. */ - vm_object_collapse(object, 0); + vm_object_collapse(object, 0, FALSE); } vm_object_unlock(object); @@ -3286,7 +3293,8 @@ static unsigned long vm_object_collapse_do_bypass = 0; __private_extern__ void vm_object_collapse( register vm_object_t object, - register vm_object_offset_t hint_offset) + register vm_object_offset_t hint_offset, + boolean_t can_bypass) { register vm_object_t backing_object; register unsigned int rcount; @@ -3298,7 +3306,8 @@ vm_object_collapse( vm_object_collapse_calls++; - if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) { + if (! vm_object_collapse_allowed && + ! (can_bypass && vm_object_bypass_allowed)) { return; } @@ -3481,7 +3490,7 @@ vm_object_collapse( * or permitted, so let's try bypassing it. */ - if (! vm_object_bypass_allowed) { + if (! (can_bypass && vm_object_bypass_allowed)) { /* try and collapse the rest of the shadow chain */ if (object != original_object) { vm_object_unlock(object); @@ -3809,7 +3818,7 @@ vm_object_coalesce( /* * Try to collapse the object first */ - vm_object_collapse(prev_object, prev_offset); + vm_object_collapse(prev_object, prev_offset, TRUE); /* * Can't coalesce if pages not mapped to @@ -4349,7 +4358,7 @@ vm_object_populate_with_private( * memory_object_free_from_cache: * * Walk the vm_object cache list, removing and freeing vm_objects - * which are backed by the pager identified by the caller, (pager_id). + * which are backed by the pager identified by the caller, (pager_ops). * Remove up to "count" objects, if there are that may available * in the cache. * @@ -4360,7 +4369,7 @@ vm_object_populate_with_private( __private_extern__ kern_return_t memory_object_free_from_cache( __unused host_t host, - int *pager_id, + memory_object_pager_ops_t pager_ops, int *count) { @@ -4379,7 +4388,8 @@ memory_object_free_from_cache( queue_iterate(&vm_object_cached_list, object, vm_object_t, cached_list) { - if (object->pager && (pager_id == object->pager->pager)) { + if (object->pager && + (pager_ops == object->pager->mo_pager_ops)) { vm_object_lock(object); queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list); @@ -5450,3 +5460,238 @@ MACRO_END return retval; } + + +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the VM object rather than on a UPL */ + +kern_return_t +vm_object_page_op( + vm_object_t object, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) +{ + vm_page_t dst_page; + + vm_object_lock(object); + + if(ops & UPL_POP_PHYSICAL) { + if(object->phys_contiguous) { + if (phys_entry) { + *phys_entry = (ppnum_t) + (object->shadow_offset >> 12); + } + vm_object_unlock(object); + return KERN_SUCCESS; + } else { + vm_object_unlock(object); + return KERN_INVALID_OBJECT; + } + } + if(object->phys_contiguous) { + vm_object_unlock(object); + return KERN_INVALID_OBJECT; + } + + while(TRUE) { + if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { + vm_object_unlock(object); + return KERN_FAILURE; + } + + /* Sync up on getting the busy bit */ + if((dst_page->busy || dst_page->cleaning) && + (((ops & UPL_POP_SET) && + (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { + /* someone else is playing with the page, we will */ + /* have to wait */ + PAGE_SLEEP(object, dst_page, THREAD_UNINT); + continue; + } + + if (ops & UPL_POP_DUMP) { + vm_page_lock_queues(); + + if (dst_page->no_isync == FALSE) + pmap_disconnect(dst_page->phys_page); + vm_page_free(dst_page); + + vm_page_unlock_queues(); + break; + } + + if (flags) { + *flags = 0; + + /* Get the condition of flags before requested ops */ + /* are undertaken */ + + if(dst_page->dirty) *flags |= UPL_POP_DIRTY; + if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; + if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; + if(dst_page->absent) *flags |= UPL_POP_ABSENT; + if(dst_page->busy) *flags |= UPL_POP_BUSY; + } + + /* The caller should have made a call either contingent with */ + /* or prior to this call to set UPL_POP_BUSY */ + if(ops & UPL_POP_SET) { + /* The protection granted with this assert will */ + /* not be complete. If the caller violates the */ + /* convention and attempts to change page state */ + /* without first setting busy we may not see it */ + /* because the page may already be busy. However */ + /* if such violations occur we will assert sooner */ + /* or later. */ + assert(dst_page->busy || (ops & UPL_POP_BUSY)); + if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE; + if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; + if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; + if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; + if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; + } + + if(ops & UPL_POP_CLR) { + assert(dst_page->busy); + if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; + if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; + if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; + if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; + if (ops & UPL_POP_BUSY) { + dst_page->busy = FALSE; + PAGE_WAKEUP(dst_page); + } + } + + if (dst_page->encrypted) { + /* + * ENCRYPTED SWAP: + * We need to decrypt this encrypted page before the + * caller can access its contents. + * But if the caller really wants to access the page's + * contents, they have to keep the page "busy". + * Otherwise, the page could get recycled or re-encrypted + * at any time. + */ + if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && + dst_page->busy) { + /* + * The page is stable enough to be accessed by + * the caller, so make sure its contents are + * not encrypted. + */ + vm_page_decrypt(dst_page, 0); + } else { + /* + * The page is not busy, so don't bother + * decrypting it, since anything could + * happen to it between now and when the + * caller wants to access it. + * We should not give the caller access + * to this page. + */ + assert(!phys_entry); + } + } + + if (phys_entry) { + /* + * The physical page number will remain valid + * only if the page is kept busy. + * ENCRYPTED SWAP: make sure we don't let the + * caller access an encrypted page. + */ + assert(dst_page->busy); + assert(!dst_page->encrypted); + *phys_entry = dst_page->phys_page; + } + + break; + } + + vm_object_unlock(object); + return KERN_SUCCESS; + +} + +/* + * vm_object_range_op offers performance enhancement over + * vm_object_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ + +kern_return_t +vm_object_range_op( + vm_object_t object, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range) +{ + vm_object_offset_t offset; + vm_page_t dst_page; + + if (object->resident_page_count == 0) { + if (range) { + if (ops & UPL_ROP_PRESENT) + *range = 0; + else + *range = offset_end - offset_beg; + } + return KERN_SUCCESS; + } + vm_object_lock(object); + + if (object->phys_contiguous) { + vm_object_unlock(object); + return KERN_INVALID_OBJECT; + } + + offset = offset_beg; + + while (offset < offset_end) { + dst_page = vm_page_lookup(object, offset); + if (dst_page != VM_PAGE_NULL) { + if (ops & UPL_ROP_DUMP) { + if (dst_page->busy || dst_page->cleaning) { + /* + * someone else is playing with the + * page, we will have to wait + */ + PAGE_SLEEP(object, + dst_page, THREAD_UNINT); + /* + * need to relook the page up since it's + * state may have changed while we slept + * it might even belong to a different object + * at this point + */ + continue; + } + vm_page_lock_queues(); + + if (dst_page->no_isync == FALSE) + pmap_disconnect(dst_page->phys_page); + vm_page_free(dst_page); + + vm_page_unlock_queues(); + } else if (ops & UPL_ROP_ABSENT) + break; + } else if (ops & UPL_ROP_PRESENT) + break; + + offset += PAGE_SIZE; + } + vm_object_unlock(object); + + if (range) + *range = offset - offset_beg; + + return KERN_SUCCESS; +} diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index 7ef6f55f3..806fc4ef3 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -130,7 +130,7 @@ struct vm_object { * asserted. */ - unsigned int paging_in_progress; + int paging_in_progress; /* The memory object ports are * being used (e.g., for pagein * or pageout) -- don't change @@ -271,6 +271,17 @@ struct vm_object { not_in_use:24; #ifdef UPL_DEBUG queue_head_t uplq; /* List of outstanding upls */ +#ifdef VM_PIP_DEBUG +/* + * Keep track of the stack traces for the first holders + * of a "paging_in_progress" reference for this VM object. + */ +#define VM_PIP_DEBUG_STACK_FRAMES 25 /* depth of each stack trace */ +#define VM_PIP_DEBUG_MAX_REFS 10 /* track that many references */ + struct __pip_backtrace { + void *pip_retaddr[VM_PIP_DEBUG_STACK_FRAMES]; + } pip_holders[VM_PIP_DEBUG_MAX_REFS]; +#endif /* VM_PIP_DEBUG */ #endif /* UPL_DEBUG */ }; @@ -449,7 +460,8 @@ __private_extern__ boolean_t vm_object_shadow( __private_extern__ void vm_object_collapse( vm_object_t object, - vm_object_offset_t offset); + vm_object_offset_t offset, + boolean_t can_bypass); __private_extern__ boolean_t vm_object_copy_quickly( vm_object_t *_object, @@ -560,10 +572,24 @@ __private_extern__ kern_return_t vm_object_populate_with_private( ppnum_t phys_page, vm_size_t size); -__private_extern__ kern_return_t adjust_vm_object_cache( +extern kern_return_t adjust_vm_object_cache( vm_size_t oval, vm_size_t nval); +extern kern_return_t vm_object_page_op( + vm_object_t object, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags); + +extern kern_return_t vm_object_range_op( + vm_object_t object, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range); + /* * Event waiting handling */ @@ -612,15 +638,30 @@ __private_extern__ kern_return_t adjust_vm_object_cache( /* * Routines implemented as macros */ +#ifdef VM_PIP_DEBUG +extern unsigned OSBacktrace(void **bt, unsigned maxAddrs); +#define VM_PIP_DEBUG_BEGIN(object) \ + MACRO_BEGIN \ + if ((object)->paging_in_progress < VM_PIP_DEBUG_MAX_REFS) { \ + int pip = (object)->paging_in_progress; \ + (void) OSBacktrace(&(object)->pip_holders[pip].retaddr[0], \ + VM_PIP_DEBUG_STACK_FRAMES); \ + } \ + MACRO_END +#else /* VM_PIP_DEBUG */ +#define VM_PIP_DEBUG_BEGIN(object) +#endif /* VM_PIP_DEBUG */ #define vm_object_paging_begin(object) \ MACRO_BEGIN \ + assert((object)->paging_in_progress >= 0); \ + VM_PIP_DEBUG_BEGIN((object)); \ (object)->paging_in_progress++; \ MACRO_END #define vm_object_paging_end(object) \ MACRO_BEGIN \ - assert((object)->paging_in_progress != 0); \ + assert((object)->paging_in_progress > 0); \ if (--(object)->paging_in_progress == 0) { \ vm_object_wakeup(object, \ VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \ diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index d0cfea88e..9b66d6210 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -294,6 +294,14 @@ extern vm_offset_t vm_page_fictitious_addr; extern boolean_t vm_page_deactivate_hint; +// 0 = all pages avail, 1 = disable high mem, 2 = prefer himem +extern int vm_himemory_mode; + +extern ppnum_t vm_lopage_poolend; +extern int vm_lopage_poolsize; +extern uint64_t max_valid_dma_address; + + /* * Prototypes for functions exported by this module. */ @@ -325,6 +333,8 @@ extern int vm_pool_low(void); extern vm_page_t vm_page_grab(void); +extern vm_page_t vm_page_grablo(void); + extern void vm_page_release( vm_page_t page); @@ -335,6 +345,10 @@ extern vm_page_t vm_page_alloc( vm_object_t object, vm_object_offset_t offset); +extern vm_page_t vm_page_alloclo( + vm_object_t object, + vm_object_offset_t offset); + extern void vm_page_init( vm_page_t page, ppnum_t phys_page); diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 0ccb3e1ac..357782704 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -1063,9 +1063,6 @@ struct flow_control { mach_timespec_t ts; }; -extern kern_return_t sysclk_gettime(mach_timespec_t *); - - void vm_pageout_scan(void) { @@ -1351,7 +1348,9 @@ vm_pageout_scan(void) reset_deadlock_timer: ts.tv_sec = vm_pageout_deadlock_wait / 1000; ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; - sysclk_gettime(&flow_control.ts); + clock_get_system_nanotime( + &flow_control.ts.tv_sec, + (uint32_t *) &flow_control.ts.tv_nsec); ADD_MACH_TIMESPEC(&flow_control.ts, &ts); flow_control.state = FCS_DELAYED; @@ -1360,7 +1359,9 @@ vm_pageout_scan(void) break; case FCS_DELAYED: - sysclk_gettime(&ts); + clock_get_system_nanotime( + &ts.tv_sec, + (uint32_t *) &ts.tv_nsec); if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { /* @@ -1933,7 +1934,9 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) */ if (!object->pager_initialized) - vm_object_collapse(object, (vm_object_offset_t)0); + vm_object_collapse(object, + (vm_object_offset_t) 0, + TRUE); if (!object->pager_initialized) vm_object_pager_create(object); if (!object->pager_initialized) { @@ -2213,6 +2216,7 @@ upl_create( upl->size = 0; upl->map_object = NULL; upl->ref_count = 1; + upl->highest_page = 0; upl_lock_init(upl); #ifdef UPL_DEBUG upl->ubc_alias1 = 0; @@ -2391,7 +2395,7 @@ vm_object_upl_request( *page_list_count = MAX_UPL_TRANSFER; if((!object->internal) && (object->paging_offset != 0)) - panic("vm_object_upl_request: vnode object with non-zero paging offset\n"); + panic("vm_object_upl_request: external object with non-zero paging offset\n"); if((cntrl_flags & UPL_COPYOUT_FROM) && (upl_ptr == NULL)) { return KERN_SUCCESS; @@ -2499,6 +2503,7 @@ vm_object_upl_request( (offset + object->shadow_offset)>>PAGE_SHIFT; user_page_list[0].device = TRUE; } + upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT; if(page_list_count != NULL) { if (upl->flags & UPL_INTERNAL) { @@ -2811,6 +2816,9 @@ vm_object_upl_request( } } + if (dst_page->phys_page > upl->highest_page) + upl->highest_page = dst_page->phys_page; + if(user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_page; @@ -3123,6 +3131,10 @@ vm_object_upl_request( dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE; + + if (dst_page->phys_page > upl->highest_page) + upl->highest_page = dst_page->phys_page; + if(user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_page; @@ -3231,7 +3243,7 @@ vm_fault_list_request( int page_list_count, int cntrl_flags) { - int local_list_count; + unsigned int local_list_count; upl_page_info_t *user_page_list; kern_return_t kr; @@ -4690,6 +4702,21 @@ vm_object_iopl_request( */ return KERN_INVALID_VALUE; } + if (vm_lopage_poolsize == 0) + cntrl_flags &= ~UPL_NEED_32BIT_ADDR; + + if (cntrl_flags & UPL_NEED_32BIT_ADDR) { + if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) + return KERN_INVALID_VALUE; + + if (object->phys_contiguous) { + if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) + return KERN_INVALID_ADDRESS; + + if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) + return KERN_INVALID_ADDRESS; + } + } if (cntrl_flags & UPL_ENCRYPT) { /* @@ -4722,7 +4749,7 @@ vm_object_iopl_request( return KERN_INVALID_ARGUMENT; if((!object->internal) && (object->paging_offset != 0)) - panic("vm_object_upl_request: vnode object with non-zero paging offset\n"); + panic("vm_object_upl_request: external object with non-zero paging offset\n"); if(object->phys_contiguous) { /* No paging operations are possible against this memory */ @@ -4790,6 +4817,7 @@ vm_object_iopl_request( (offset + object->shadow_offset)>>PAGE_SHIFT; user_page_list[0].device = TRUE; } + upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT; if(page_list_count != NULL) { if (upl->flags & UPL_INTERNAL) { @@ -4959,24 +4987,75 @@ vm_object_iopl_request( ret = (error_code ? error_code: KERN_MEMORY_ERROR); vm_object_lock(object); - for(; offset < dst_offset; - offset += PAGE_SIZE) { - dst_page = vm_page_lookup( - object, offset); - if(dst_page == VM_PAGE_NULL) - panic("vm_object_iopl_request: Wired pages missing. \n"); - vm_page_lock_queues(); - vm_page_unwire(dst_page); - vm_page_unlock_queues(); - VM_STAT(reactivations++); - } - vm_object_unlock(object); - upl_destroy(upl); - return ret; + + goto return_err; } } while ((result != VM_FAULT_SUCCESS) || (result == VM_FAULT_INTERRUPTED)); } + + if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && + dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { + vm_page_t low_page; + int refmod; + + /* + * support devices that can't DMA above 32 bits + * by substituting pages from a pool of low address + * memory for any pages we find above the 4G mark + * can't substitute if the page is already wired because + * we don't know whether that physical address has been + * handed out to some other 64 bit capable DMA device to use + */ + if (dst_page->wire_count) { + ret = KERN_PROTECTION_FAILURE; + goto return_err; + } + if (delayed_unlock) { + delayed_unlock = 0; + vm_page_unlock_queues(); + } + low_page = vm_page_grablo(); + + if (low_page == VM_PAGE_NULL) { + ret = KERN_RESOURCE_SHORTAGE; + goto return_err; + } + /* + * from here until the vm_page_replace completes + * we musn't drop the object lock... we don't + * want anyone refaulting this page in and using + * it after we disconnect it... we want the fault + * to find the new page being substituted. + */ + refmod = pmap_disconnect(dst_page->phys_page); + + vm_page_copy(dst_page, low_page); + + low_page->reference = dst_page->reference; + low_page->dirty = dst_page->dirty; + + if (refmod & VM_MEM_REFERENCED) + low_page->reference = TRUE; + if (refmod & VM_MEM_MODIFIED) + low_page->dirty = TRUE; + + vm_page_lock_queues(); + vm_page_replace(low_page, object, dst_offset); + /* + * keep the queue lock since we're going to + * need it immediately + */ + delayed_unlock = 1; + + dst_page = low_page; + /* + * vm_page_grablo returned the page marked + * BUSY... we don't need a PAGE_WAKEUP_DONE + * here, because we've never dropped the object lock + */ + dst_page->busy = FALSE; + } if (delayed_unlock == 0) vm_page_lock_queues(); vm_page_wire(dst_page); @@ -5022,6 +5101,9 @@ vm_object_iopl_request( dst_page->dirty = TRUE; alias_page = NULL; + if (dst_page->phys_page > upl->highest_page) + upl->highest_page = dst_page->phys_page; + if (user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_page; @@ -5074,8 +5156,30 @@ vm_object_iopl_request( } return KERN_SUCCESS; + + +return_err: + if (delayed_unlock) + vm_page_unlock_queues(); + + for (; offset < dst_offset; offset += PAGE_SIZE) { + dst_page = vm_page_lookup(object, offset); + + if (dst_page == VM_PAGE_NULL) + panic("vm_object_iopl_request: Wired pages missing. \n"); + vm_page_lock_queues(); + vm_page_unwire(dst_page); + vm_page_unlock_queues(); + VM_STAT(reactivations++); + } + vm_object_paging_end(object); + vm_object_unlock(object); + upl_destroy(upl); + + return ret; } + kern_return_t upl_transpose( upl_t upl1, @@ -5251,6 +5355,7 @@ vm_paging_map_object( &page_map_offset, VM_PAGING_NUM_PAGES * PAGE_SIZE, 0, + 0, &map_entry); if (kr != KERN_SUCCESS) { panic("vm_paging_map_object: " @@ -5420,7 +5525,7 @@ vm_paging_unmap_object( int i; #endif /* __ppc__ */ - if ((vm_paging_base_address != 0) && + if ((vm_paging_base_address == 0) && ((start < vm_paging_base_address) || (end > (vm_paging_base_address + (VM_PAGING_NUM_PAGES * PAGE_SIZE))))) { @@ -5845,7 +5950,7 @@ vm_page_decrypt( * be part of a DMA transfer from a driver that expects the memory to * be coherent at this point, we have to flush the data cache. */ - pmap_sync_page_data_phys(page->phys_page); + pmap_sync_page_attributes_phys(page->phys_page); /* * Since the page is not mapped yet, some code might assume that it * doesn't need to invalidate the instruction cache when writing to @@ -5968,18 +6073,16 @@ upl_get_internal_pagelist_offset(void) return sizeof(struct upl); } -void -upl_set_dirty( - upl_t upl) -{ - upl->flags |= UPL_CLEAR_DIRTY; -} - void upl_clear_dirty( - upl_t upl) + upl_t upl, + boolean_t value) { - upl->flags &= ~UPL_CLEAR_DIRTY; + if (value) { + upl->flags |= UPL_CLEAR_DIRTY; + } else { + upl->flags &= ~UPL_CLEAR_DIRTY; + } } @@ -6074,6 +6177,12 @@ vm_countdirtypages(void) } #endif /* MACH_BSD */ +ppnum_t upl_get_highest_page( + upl_t upl) +{ + return upl->highest_page; +} + #ifdef UPL_DEBUG kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2) { diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index 72f7549ce..0c54ccc31 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -79,6 +79,9 @@ extern kern_return_t vm_map_create_upl( unsigned int *count, int *flags); +extern ppnum_t upl_get_highest_page( + upl_t upl); + #ifdef MACH_KERNEL_PRIVATE #include @@ -140,6 +143,7 @@ struct upl { upl_size_t size; /* size in bytes of the address space */ vm_offset_t kaddr; /* secondary mapping in kernel */ vm_object_t map_object; + ppnum_t highest_page; #ifdef UPL_DEBUG unsigned int ubc_alias1; unsigned int ubc_alias2; diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index 84726529d..2d6e13c36 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,6 +57,7 @@ extern kern_return_t device_close( */ extern int start_def_pager( char *bs_device); +extern int default_pager_init_flag; /* * osfmk @@ -65,6 +66,8 @@ extern int start_def_pager( /* these should be exported cleanly from OSFMK since BSD needs them */ extern ipc_port_t convert_task_to_port( task_t task); +extern ipc_port_t convert_task_name_to_port( + task_name_t task_name); #endif /* _KERN_IPC_TT_H_ */ #ifndef _IPC_IPC_PORT_H_ extern mach_port_name_t ipc_port_copyout_send( @@ -133,13 +136,22 @@ extern mach_vm_offset_t mach_get_vm_end(vm_map_t); extern vm_offset_t get_vm_start(vm_map_t); extern vm_offset_t get_vm_end(vm_map_t); -#ifdef __PPC__ /* * LP64todo - map in the commpage cleanly and remove these. */ extern void vm_map_commpage64( vm_map_t ); -extern void vm_map_remove_commpage64( vm_map_t ); -#endif /* __PPC__ */ +extern void vm_map_remove_commpage( vm_map_t ); +#ifdef __i386__ +extern void vm_map_commpage32(vm_map_t); +extern kern_return_t vm_map_apple_protected( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); +extern void apple_protect_pager_bootstrap(void); +extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object); +extern void apple_protect_pager_map(memory_object_t mem_obj); +#endif /* __i386__ */ + /* * bsd @@ -180,6 +192,14 @@ extern memory_object_t vnode_pager_setup( struct vnode *, memory_object_t); extern vm_object_offset_t vnode_pager_get_filesize( struct vnode *); +extern kern_return_t vnode_pager_get_pathname( + struct vnode *vp, + char *pathname, + vm_size_t *length_p); +extern kern_return_t vnode_pager_get_filename( + struct vnode *vp, + char **filename); + #endif /* _VNODE_PAGER_ */ extern void vnode_pager_bootstrap(void); extern kern_return_t @@ -195,6 +215,13 @@ extern kern_return_t vnode_pager_init( extern kern_return_t vnode_pager_get_object_size( memory_object_t, memory_object_offset_t *); +extern kern_return_t vnode_pager_get_object_pathname( + memory_object_t mem_obj, + char *pathname, + vm_size_t *length_p); +extern kern_return_t vnode_pager_get_object_filename( + memory_object_t mem_obj, + char **filename); extern kern_return_t vnode_pager_data_request( memory_object_t, memory_object_offset_t, @@ -233,9 +260,6 @@ extern void vnode_pager_release_from_cache( extern void ubc_unmap( struct vnode *vp); -extern int vnode_pager_workaround; -extern int device_pager_workaround; - extern void dp_memory_object_reference(memory_object_t); extern void dp_memory_object_deallocate(memory_object_t); #ifndef _memory_object_server_ @@ -286,6 +310,8 @@ extern kern_return_t device_pager_data_request(memory_object_t, extern kern_return_t device_pager_data_return(memory_object_t, memory_object_offset_t, vm_size_t, + memory_object_offset_t *, + int *, boolean_t, boolean_t, int); @@ -325,6 +351,8 @@ extern int macx_swapinfo( vm_size_t *pagesize_p, boolean_t *encrypted_p); +extern void log_nx_failure(addr64_t vaddr, vm_prot_t prot); + #endif /* _VM_VM_PROTOS_H_ */ #endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index dfeee81a4..2ba881ad4 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -244,6 +244,12 @@ unsigned int vm_page_gobble_count_warning = 0; unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ uint64_t vm_page_purged_count = 0; /* total count of purged pages */ +ppnum_t vm_lopage_poolstart = 0; +ppnum_t vm_lopage_poolend = 0; +int vm_lopage_poolsize = 0; +uint64_t max_valid_dma_address = 0xffffffffffffffffULL; + + /* * Several page replacement parameters are also * shared with this module, so that page allocation @@ -549,6 +555,8 @@ pmap_startup( vm_page_t pages; ppnum_t phys_page; addr64_t tmpaddr; + unsigned int num_of_lopages = 0; + unsigned int last_index; /* * We calculate how many page frames we will have @@ -564,7 +572,6 @@ pmap_startup( /* * Initialize the page frames. */ - for (i = 0, pages_initialized = 0; i < npages; i++) { if (!pmap_next_page(&phys_page)) break; @@ -574,21 +581,65 @@ pmap_startup( pages_initialized++; } + /* + * Check if we want to initialize pages to a known value + */ + fill = 0; /* Assume no fill */ + if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */ + + /* + * if vm_lopage_poolsize is non-zero, than we need to reserve + * a pool of pages whose addresess are less than 4G... this pool + * is used by drivers whose hardware can't DMA beyond 32 bits... + * + * note that I'm assuming that the page list is ascending and + * ordered w/r to the physical address + */ + for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) { + vm_page_t m; + + m = &pages[i]; + + if (m->phys_page >= (1 << (32 - PAGE_SHIFT))) + panic("couldn't reserve the lopage pool: not enough lo pages\n"); + + if (m->phys_page < vm_lopage_poolend) + panic("couldn't reserve the lopage pool: page list out of order\n"); + + vm_lopage_poolend = m->phys_page; + + if (vm_lopage_poolstart == 0) + vm_lopage_poolstart = m->phys_page; + else { + if (m->phys_page < vm_lopage_poolstart) + panic("couldn't reserve the lopage pool: page list out of order\n"); + } + + if (fill) + fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */ + + vm_page_release(m); + } + last_index = i; + + // -debug code remove + if (2 == vm_himemory_mode) { + // free low -> high so high is preferred + for (i = last_index + 1; i <= pages_initialized; i++) { + if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ + vm_page_release(&pages[i - 1]); + } + } + else + // debug code remove- + /* * Release pages in reverse order so that physical pages * initially get allocated in ascending addresses. This keeps * the devices (which must address physical memory) happy if * they require several consecutive pages. */ - -/* - * Check if we want to initialize pages to a known value - */ - - fill = 0; /* Assume no fill */ - if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */ - - for (i = pages_initialized; i > 0; i--) { + for (i = pages_initialized; i > last_index; i--) { if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ vm_page_release(&pages[i - 1]); } @@ -806,7 +857,8 @@ vm_page_replace( register vm_object_t object, register vm_object_offset_t offset) { - register vm_page_bucket_t *bucket; + vm_page_bucket_t *bucket; + vm_page_t found_m = VM_PAGE_NULL; VM_PAGE_CHECK(mem); #if DEBUG @@ -832,46 +884,60 @@ vm_page_replace( bucket = &vm_page_buckets[vm_page_hash(object, offset)]; simple_lock(&vm_page_bucket_lock); + if (bucket->pages) { vm_page_t *mp = &bucket->pages; register vm_page_t m = *mp; + do { if (m->object == object && m->offset == offset) { /* - * Remove page from bucket and from object, - * and return it to the free list. + * Remove old page from hash list */ *mp = m->next; - VM_PAGE_REMOVE(m); - m->tabled = FALSE; - m->object = VM_OBJECT_NULL; - m->offset = (vm_object_offset_t) -1; - object->resident_page_count--; - - if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || - object->purgable == VM_OBJECT_PURGABLE_EMPTY) { - assert(vm_page_purgeable_count > 0); - vm_page_purgeable_count--; - } - - /* - * Return page to the free list. - * Note the page is not tabled now, so this - * won't self-deadlock on the bucket lock. - */ - vm_page_free(m); + found_m = m; break; } mp = &m->next; } while ((m = *mp)); + mem->next = bucket->pages; } else { mem->next = VM_PAGE_NULL; } + /* + * insert new page at head of hash list + */ bucket->pages = mem; + simple_unlock(&vm_page_bucket_lock); + if (found_m) { + /* + * there was already a page at the specified + * offset for this object... remove it from + * the object and free it back to the free list + */ + VM_PAGE_REMOVE(found_m); + found_m->tabled = FALSE; + + found_m->object = VM_OBJECT_NULL; + found_m->offset = (vm_object_offset_t) -1; + object->resident_page_count--; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + assert(vm_page_purgeable_count > 0); + vm_page_purgeable_count--; + } + + /* + * Return page to the free list. + * Note the page is not tabled now + */ + vm_page_free(found_m); + } /* * Now link into the object's list of backed pages. */ @@ -1034,7 +1100,19 @@ vm_page_lookup( bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + /* + * since we hold the object lock, we are guaranteed that no + * new pages can be inserted into this object... this in turn + * guarantess that the page we're looking for can't exist + * if the bucket it hashes to is currently NULL even when looked + * at outside the scope of the hash bucket lock... this is a + * really cheap optimiztion to avoid taking the lock + */ + if (bucket->pages == VM_PAGE_NULL) { + return (VM_PAGE_NULL); + } simple_lock(&vm_page_bucket_lock); + for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { VM_PAGE_CHECK(mem); if ((mem->object == object) && (mem->offset == offset)) @@ -1336,6 +1414,55 @@ vm_pool_low(void) return( vm_page_free_count < vm_page_free_reserved ); } + + +/* + * this is an interface to support bring-up of drivers + * on platforms with physical memory > 4G... + */ +int vm_himemory_mode = 0; + + +/* + * this interface exists to support hardware controllers + * incapable of generating DMAs with more than 32 bits + * of address on platforms with physical memory > 4G... + */ +unsigned int vm_lopage_free_count = 0; +unsigned int vm_lopage_max_count = 0; +vm_page_t vm_lopage_queue_free = VM_PAGE_NULL; + +vm_page_t +vm_page_grablo(void) +{ + register vm_page_t mem; + unsigned int vm_lopage_alloc_count; + + if (vm_lopage_poolsize == 0) + return (vm_page_grab()); + + mutex_lock(&vm_page_queue_free_lock); + + if ((mem = vm_lopage_queue_free) != VM_PAGE_NULL) { + + vm_lopage_queue_free = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + mem->pageq.prev = NULL; + mem->free = FALSE; + mem->no_isync = TRUE; + + vm_lopage_free_count--; + vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count; + if (vm_lopage_alloc_count > vm_lopage_max_count) + vm_lopage_max_count = vm_lopage_alloc_count; + } + mutex_unlock(&vm_page_queue_free_lock); + + return (mem); +} + + + /* * vm_page_grab: * @@ -1461,36 +1588,46 @@ vm_page_release( assert(mem->object == VM_OBJECT_NULL); assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); - mem->pageq.next = (queue_entry_t) vm_page_queue_free; - vm_page_queue_free = mem; - vm_page_free_count++; - /* - * Check if we should wake up someone waiting for page. - * But don't bother waking them unless they can allocate. - * - * We wakeup only one thread, to prevent starvation. - * Because the scheduling system handles wait queues FIFO, - * if we wakeup all waiting threads, one greedy thread - * can starve multiple niceguy threads. When the threads - * all wakeup, the greedy threads runs first, grabs the page, - * and waits for another page. It will be the first to run - * when the next page is freed. - * - * However, there is a slight danger here. - * The thread we wake might not use the free page. - * Then the other threads could wait indefinitely - * while the page goes unused. To forestall this, - * the pageout daemon will keep making free pages - * as long as vm_page_free_wanted is non-zero. - */ + if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { + /* + * this exists to support hardware controllers + * incapable of generating DMAs with more than 32 bits + * of address on platforms with physical memory > 4G... + */ + mem->pageq.next = (queue_entry_t) vm_lopage_queue_free; + vm_lopage_queue_free = mem; + vm_lopage_free_count++; + } else { + mem->pageq.next = (queue_entry_t) vm_page_queue_free; + vm_page_queue_free = mem; + vm_page_free_count++; + /* + * Check if we should wake up someone waiting for page. + * But don't bother waking them unless they can allocate. + * + * We wakeup only one thread, to prevent starvation. + * Because the scheduling system handles wait queues FIFO, + * if we wakeup all waiting threads, one greedy thread + * can starve multiple niceguy threads. When the threads + * all wakeup, the greedy threads runs first, grabs the page, + * and waits for another page. It will be the first to run + * when the next page is freed. + * + * However, there is a slight danger here. + * The thread we wake might not use the free page. + * Then the other threads could wait indefinitely + * while the page goes unused. To forestall this, + * the pageout daemon will keep making free pages + * as long as vm_page_free_wanted is non-zero. + */ - if ((vm_page_free_wanted > 0) && - (vm_page_free_count >= vm_page_free_reserved)) { - vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_count); + if ((vm_page_free_wanted > 0) && + (vm_page_free_count >= vm_page_free_reserved)) { + vm_page_free_wanted--; + thread_wakeup_one((event_t) &vm_page_free_count); + } } - mutex_unlock(&vm_page_queue_free_lock); } @@ -1568,6 +1705,27 @@ vm_page_alloc( return(mem); } + +vm_page_t +vm_page_alloclo( + vm_object_t object, + vm_object_offset_t offset) +{ + register vm_page_t mem; + +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); +#endif + mem = vm_page_grablo(); + if (mem == VM_PAGE_NULL) + return VM_PAGE_NULL; + + vm_page_insert(mem, object, offset); + + return(mem); +} + + counter(unsigned int c_laundry_pages_freed = 0;) int vm_pagein_cluster_unused = 0; diff --git a/osfmk/vm/vm_shared_memory_server.c b/osfmk/vm/vm_shared_memory_server.c index d241dba2e..183d7b0c5 100644 --- a/osfmk/vm/vm_shared_memory_server.c +++ b/osfmk/vm/vm_shared_memory_server.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -47,11 +47,14 @@ #include #include #include +#include #include #include #include +int shared_region_trace_level = SHARED_REGION_TRACE_ERROR; + #if DEBUG int lsf_debug = 0; int lsf_alloc_debug = 0; @@ -109,6 +112,7 @@ lsf_hash_lookup( static load_struct_t * lsf_hash_delete( + load_struct_t *target_entry, /* optional */ void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info); @@ -154,6 +158,7 @@ lsf_unload( static void lsf_deallocate( + load_struct_t *target_entry, /* optional */ void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info, @@ -164,11 +169,6 @@ lsf_deallocate( ((((natural_t)file_object) & 0xffffff) % size) /* Implementation */ -vm_offset_t shared_file_text_region; -vm_offset_t shared_file_data_region; - -ipc_port_t shared_text_region_handle; -ipc_port_t shared_data_region_handle; vm_offset_t shared_file_mapping_array = 0; shared_region_mapping_t default_environment_shared_regions = NULL; @@ -192,7 +192,8 @@ ipc_port_t com_region_handle32 = NULL; ipc_port_t com_region_handle64 = NULL; vm_map_t com_region_map32 = NULL; vm_map_t com_region_map64 = NULL; -vm_size_t com_region_size = _COMM_PAGE_AREA_LENGTH; +vm_size_t com_region_size32 = _COMM_PAGE32_AREA_LENGTH; +vm_size_t com_region_size64 = _COMM_PAGE64_AREA_LENGTH; shared_region_mapping_t com_mapping_resource = NULL; @@ -220,12 +221,33 @@ vm_set_shared_region( task_t task, shared_region_mapping_t shared_region) { + shared_region_mapping_t old_region; + SHARED_REGION_DEBUG(("vm_set_shared_region(task=%p, " - "shared_region=%p)\n", - task, shared_region)); + "shared_region=%p[%x,%x,%x])\n", + task, shared_region, + shared_region ? shared_region->fs_base : 0, + shared_region ? shared_region->system : 0, + shared_region ? shared_region->flags : 0)); if (shared_region) { assert(shared_region->ref_count > 0); } + + old_region = task->system_shared_region; + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_INFO, + ("shared_region: %p set_region(task=%p)" + "old=%p[%x,%x,%x], new=%p[%x,%x,%x]\n", + current_thread(), task, + old_region, + old_region ? old_region->fs_base : 0, + old_region ? old_region->system : 0, + old_region ? old_region->flags : 0, + shared_region, + shared_region ? shared_region->fs_base : 0, + shared_region ? shared_region->system : 0, + shared_region ? shared_region->flags : 0)); + task->system_shared_region = shared_region; return KERN_SUCCESS; } @@ -298,7 +320,9 @@ shared_region_mapping_create( vm_offset_t client_base, shared_region_mapping_t *shared_region, vm_offset_t alt_base, - vm_offset_t alt_next) + vm_offset_t alt_next, + int fs_base, + int system) { SHARED_REGION_DEBUG(("shared_region_mapping_create()\n")); *shared_region = (shared_region_mapping_t) @@ -311,8 +335,8 @@ shared_region_mapping_create( shared_region_mapping_lock_init((*shared_region)); (*shared_region)->text_region = text_region; (*shared_region)->text_size = text_size; - (*shared_region)->fs_base = ENV_DEFAULT_ROOT; - (*shared_region)->system = cpu_type(); + (*shared_region)->fs_base = fs_base; + (*shared_region)->system = system; (*shared_region)->data_region = data_region; (*shared_region)->data_size = data_size; (*shared_region)->region_mappings = region_mappings; @@ -367,6 +391,8 @@ shared_region_mapping_info( *next = shared_region->next; shared_region_mapping_unlock(shared_region); + + return KERN_SUCCESS; } /* LP64todo - need 64-bit safe version */ @@ -406,7 +432,7 @@ shared_region_mapping_dealloc_lock( { struct shared_region_task_mappings sm_info; shared_region_mapping_t next = NULL; - int ref_count; + unsigned int ref_count; SHARED_REGION_DEBUG(("shared_region_mapping_dealloc_lock" "(shared_region=%p,%d,%d) ref_count=%d\n", @@ -542,7 +568,7 @@ shared_region_object_create( /* Create a named object based on a submap of specified size */ - new_map = vm_map_create(pmap_create(0), 0, size, TRUE); + new_map = vm_map_create(pmap_create(0, FALSE), 0, size, TRUE); user_entry->backing.map = new_map; user_entry->internal = TRUE; user_entry->is_sub_map = TRUE; @@ -563,7 +589,9 @@ shared_region_object_create( /* relevant as the system default flag is not set */ kern_return_t shared_file_create_system_region( - shared_region_mapping_t *shared_region) + shared_region_mapping_t *shared_region, + int fs_base, + int system) { ipc_port_t text_handle; ipc_port_t data_handle; @@ -585,10 +613,15 @@ shared_file_create_system_region( kret)); return kret; } - kret = shared_region_mapping_create(text_handle, - text_size, data_handle, data_size, mapping_array, - GLOBAL_SHARED_TEXT_SEGMENT, shared_region, - SHARED_ALTERNATE_LOAD_BASE, SHARED_ALTERNATE_LOAD_BASE); + kret = shared_region_mapping_create(text_handle, text_size, + data_handle, data_size, + mapping_array, + GLOBAL_SHARED_TEXT_SEGMENT, + shared_region, + SHARED_ALTERNATE_LOAD_BASE, + SHARED_ALTERNATE_LOAD_BASE, + fs_base, + system); if(kret) { SHARED_REGION_DEBUG(("shared_file_create_system_region: " "shared_region_mapping_create failed " @@ -894,14 +927,14 @@ shared_com_boot_time_init(void) /* create com page regions, 1 each for 32 and 64-bit code */ if((kret = shared_region_object_create( - com_region_size, + com_region_size32, &com_region_handle32))) { panic("shared_com_boot_time_init: " "unable to create 32-bit comm page\n"); return; } if((kret = shared_region_object_create( - com_region_size, + com_region_size64, &com_region_handle64))) { panic("shared_com_boot_time_init: " "unable to create 64-bit comm page\n"); @@ -917,9 +950,12 @@ shared_com_boot_time_init(void) /* wrap the com region in its own shared file mapping structure */ /* 64-bit todo: call "shared_region_mapping_create" on com_region_handle64 */ kret = shared_region_mapping_create(com_region_handle32, - com_region_size, NULL, 0, 0, - _COMM_PAGE_BASE_ADDRESS, &com_mapping_resource, - 0, 0); + com_region_size32, + NULL, 0, 0, + _COMM_PAGE_BASE_ADDRESS, + &com_mapping_resource, + 0, 0, + ENV_DEFAULT_ROOT, cpu_type()); if (kret) { panic("shared_region_mapping_create failed for commpage"); } @@ -930,6 +966,8 @@ shared_file_boot_time_init( unsigned int fs_base, unsigned int system) { + mach_port_t text_region_handle; + mach_port_t data_region_handle; long text_region_size; long data_region_size; shared_region_mapping_t new_system_region; @@ -940,24 +978,23 @@ shared_file_boot_time_init( fs_base, system)); text_region_size = 0x10000000; data_region_size = 0x10000000; - shared_file_init(&shared_text_region_handle, + shared_file_init(&text_region_handle, text_region_size, - &shared_data_region_handle, + &data_region_handle, data_region_size, &shared_file_mapping_array); - shared_region_mapping_create(shared_text_region_handle, + shared_region_mapping_create(text_region_handle, text_region_size, - shared_data_region_handle, + data_region_handle, data_region_size, shared_file_mapping_array, GLOBAL_SHARED_TEXT_SEGMENT, &new_system_region, SHARED_ALTERNATE_LOAD_BASE, - SHARED_ALTERNATE_LOAD_BASE); + SHARED_ALTERNATE_LOAD_BASE, + fs_base, system); - new_system_region->fs_base = fs_base; - new_system_region->system = system; new_system_region->flags = SHARED_REGION_SYSTEM; /* grab an extra reference for the caller */ @@ -995,8 +1032,7 @@ shared_file_init( vm_offset_t *file_mapping_array) { shared_file_info_t *sf_head; - vm_offset_t table_mapping_address; - int data_table_size; + vm_size_t data_table_size; int hash_size; kern_return_t kret; @@ -1024,14 +1060,13 @@ shared_file_init( data_table_size = data_region_size >> 9; hash_size = data_region_size >> 14; - table_mapping_address = data_region_size - data_table_size; if(shared_file_mapping_array == 0) { vm_map_address_t map_addr; buf_object = vm_object_allocate(data_table_size); if(vm_map_find_space(kernel_map, &map_addr, - data_table_size, 0, &entry) + data_table_size, 0, 0, &entry) != KERN_SUCCESS) { panic("shared_file_init: no space"); } @@ -1100,16 +1135,8 @@ shared_file_init( *file_mapping_array = shared_file_mapping_array; } - kret = vm_map(((vm_named_entry_t) - (*data_region_handle)->ip_kobject)->backing.map, - &table_mapping_address, - data_table_size, 0, - SHARED_LIB_ALIAS | VM_FLAGS_FIXED, - sfma_handle, 0, FALSE, - VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE); - SHARED_REGION_DEBUG(("shared_file_init() done\n")); - return kret; + return KERN_SUCCESS; } static kern_return_t @@ -1150,8 +1177,10 @@ shared_file_header_init( if (vm_map_wire(kernel_map, hash_cram_address, hash_cram_address + cram_size, VM_PROT_DEFAULT, FALSE) != KERN_SUCCESS) { - printf("shared_file_header_init: " - "No memory for data table\n"); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: shared_file_header_init: " + "No memory for data table\n")); return KERN_NO_SPACE; } allocable_hash_pages -= cram_pages; @@ -1325,6 +1354,175 @@ copyin_shared_file( } } +extern void shared_region_dump_file_entry( + int trace_level, + load_struct_t *entry); /* forward */ + +void shared_region_dump_file_entry( + int trace_level, + load_struct_t *entry) +{ + int i; + loaded_mapping_t *mapping; + + if (trace_level > shared_region_trace_level) { + return; + } + printf("shared region: %p: " + "file_entry %p base_address=0x%x file_offset=0x%x " + "%d mappings\n", + current_thread(), entry, + entry->base_address, entry->file_offset, entry->mapping_cnt); + mapping = entry->mappings; + for (i = 0; i < entry->mapping_cnt; i++) { + printf("shared region: %p:\t#%d: " + "offset=0x%x size=0x%x file_offset=0x%x prot=%d\n", + current_thread(), + i, + mapping->mapping_offset, + mapping->size, + mapping->file_offset, + mapping->protection); + mapping = mapping->next; + } +} + +extern void shared_region_dump_mappings( + int trace_level, + struct shared_file_mapping_np *mappings, + int map_cnt, + mach_vm_offset_t base_offset); /* forward */ + +void shared_region_dump_mappings( + int trace_level, + struct shared_file_mapping_np *mappings, + int map_cnt, + mach_vm_offset_t base_offset) +{ + int i; + + if (trace_level > shared_region_trace_level) { + return; + } + + printf("shared region: %p: %d mappings base_offset=0x%llx\n", + current_thread(), map_cnt, (uint64_t) base_offset); + for (i = 0; i < map_cnt; i++) { + printf("shared region: %p:\t#%d: " + "addr=0x%llx, size=0x%llx, file_offset=0x%llx, " + "prot=(%d,%d)\n", + current_thread(), + i, + (uint64_t) mappings[i].sfm_address, + (uint64_t) mappings[i].sfm_size, + (uint64_t) mappings[i].sfm_file_offset, + mappings[i].sfm_max_prot, + mappings[i].sfm_init_prot); + } +} + +extern void shared_region_dump_conflict_info( + int trace_level, + vm_map_t map, + vm_map_offset_t offset, + vm_map_size_t size); /* forward */ + +void +shared_region_dump_conflict_info( + int trace_level, + vm_map_t map, + vm_map_offset_t offset, + vm_map_size_t size) +{ + vm_map_entry_t entry; + vm_object_t object; + memory_object_t mem_object; + kern_return_t kr; + char *filename; + + if (trace_level > shared_region_trace_level) { + return; + } + + object = VM_OBJECT_NULL; + + vm_map_lock_read(map); + if (!vm_map_lookup_entry(map, offset, &entry)) { + entry = entry->vme_next; + } + + if (entry != vm_map_to_entry(map)) { + if (entry->is_sub_map) { + printf("shared region: %p: conflict with submap " + "at 0x%llx size 0x%llx !?\n", + current_thread(), + (uint64_t) offset, + (uint64_t) size); + goto done; + } + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + printf("shared region: %p: conflict with NULL object " + "at 0x%llx size 0x%llx !?\n", + current_thread(), + (uint64_t) offset, + (uint64_t) size); + object = VM_OBJECT_NULL; + goto done; + } + + vm_object_lock(object); + while (object->shadow != VM_OBJECT_NULL) { + vm_object_t shadow; + + shadow = object->shadow; + vm_object_lock(shadow); + vm_object_unlock(object); + object = shadow; + } + + if (object->internal) { + printf("shared region: %p: conflict with anonymous " + "at 0x%llx size 0x%llx\n", + current_thread(), + (uint64_t) offset, + (uint64_t) size); + goto done; + } + if (! object->pager_ready) { + printf("shared region: %p: conflict with uninitialized " + "at 0x%llx size 0x%llx\n", + current_thread(), + (uint64_t) offset, + (uint64_t) size); + goto done; + } + + mem_object = object->pager; + + /* + * XXX FBDP: "!internal" doesn't mean it's a vnode pager... + */ + kr = vnode_pager_get_object_filename(mem_object, + &filename); + if (kr != KERN_SUCCESS) { + filename = NULL; + } + printf("shared region: %p: conflict with '%s' " + "at 0x%llx size 0x%llx\n", + current_thread(), + filename ? filename : "", + (uint64_t) offset, + (uint64_t) size); + } +done: + if (object != VM_OBJECT_NULL) { + vm_object_unlock(object); + } + vm_map_unlock_read(map); +} + /* * map_shared_file: * @@ -1362,6 +1560,11 @@ map_shared_file( if(shared_file_header->hash_init == FALSE) { ret = shared_file_header_init(shared_file_header); if (ret != KERN_SUCCESS) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: map_shared_file: " + "shared_file_header_init() failed kr=0x%x\n", + current_thread(), ret)); mutex_unlock(&shared_file_header->lock); return KERN_NO_SPACE; } @@ -1387,6 +1590,19 @@ map_shared_file( file_mapping = file_entry->mappings; while(file_mapping != NULL) { if(i>=map_cnt) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: map_shared_file: " + "already mapped with " + "more than %d mappings\n", + current_thread(), map_cnt)); + shared_region_dump_file_entry( + SHARED_REGION_TRACE_INFO, + file_entry); + shared_region_dump_mappings( + SHARED_REGION_TRACE_INFO, + mappings, map_cnt, base_offset); + mutex_unlock(&shared_file_header->lock); return KERN_INVALID_ARGUMENT; } @@ -1396,12 +1612,37 @@ map_shared_file( mappings[i].sfm_size != file_mapping->size || mappings[i].sfm_file_offset != file_mapping->file_offset || mappings[i].sfm_init_prot != file_mapping->protection) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: " + "mapping #%d differs\n", + current_thread(), i)); + shared_region_dump_file_entry( + SHARED_REGION_TRACE_INFO, + file_entry); + shared_region_dump_mappings( + SHARED_REGION_TRACE_INFO, + mappings, map_cnt, base_offset); + break; } file_mapping = file_mapping->next; i++; } if(i!=map_cnt) { + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: map_shared_file: " + "already mapped with " + "%d mappings instead of %d\n", + current_thread(), i, map_cnt)); + shared_region_dump_file_entry( + SHARED_REGION_TRACE_INFO, + file_entry); + shared_region_dump_mappings( + SHARED_REGION_TRACE_INFO, + mappings, map_cnt, base_offset); + mutex_unlock(&shared_file_header->lock); return KERN_INVALID_ARGUMENT; } @@ -1428,6 +1669,13 @@ map_shared_file( * requested address too ? */ ret = KERN_FAILURE; + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: " + "map_shared_file: already mapped, " + "would need to slide 0x%llx\n", + current_thread(), + slide)); } else { /* * The file is already mapped at the correct @@ -1652,7 +1900,7 @@ static load_struct_t * lsf_hash_lookup( queue_head_t *hash_table, void *file_object, - vm_offset_t recognizableOffset, + vm_offset_t recognizableOffset, int size, boolean_t regular, boolean_t alternate, @@ -1796,6 +2044,7 @@ lsf_remove_regions_mappings( static load_struct_t * lsf_hash_delete( + load_struct_t *target_entry, /* optional: NULL if not relevant */ void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info) @@ -1804,8 +2053,8 @@ lsf_hash_delete( shared_file_info_t *shared_file_header; load_struct_t *entry; - LSF_DEBUG(("lsf_hash_delete(file=%p,base=0x%x,sm_info=%p)\n", - file_object, base_offset, sm_info)); + LSF_DEBUG(("lsf_hash_delete(target=%p,file=%p,base=0x%x,sm_info=%p)\n", + target_entry, file_object, base_offset, sm_info)); shared_file_header = (shared_file_info_t *)sm_info->region_mappings; @@ -1817,8 +2066,10 @@ lsf_hash_delete( entry = (load_struct_t *)queue_next(&entry->links)) { if((!(sm_info->self)) || ((shared_region_mapping_t) sm_info->self == entry->regions_instance)) { - if ((entry->file_object == (int) file_object) && - (entry->base_address == base_offset)) { + if ((target_entry == NULL || + entry == target_entry) && + (entry->file_object == (int) file_object) && + (entry->base_address == base_offset)) { queue_remove(bucket, entry, load_struct_ptr_t, links); LSF_DEBUG(("lsf_hash_delete: found it\n")); @@ -2223,6 +2474,10 @@ lsf_slide( wiggle_room = base_offset; for (i = (signed) map_cnt - 1; i >= 0; i--) { + if (mappings[i].sfm_size == 0) { + /* nothing to map here... */ + continue; + } if (mappings[i].sfm_init_prot & VM_PROT_COW) { /* copy-on-write mappings are in the data submap */ map = data_map; @@ -2392,6 +2647,7 @@ lsf_map( kern_return_t kr; int i; mach_vm_offset_t original_base_offset; + mach_vm_size_t total_size; /* get the VM object from the file's memory object handle */ file_object = memory_object_control_to_vm_object(file_control); @@ -2414,7 +2670,11 @@ lsf_map( map_cnt, file_object, sm_info, entry)); if (entry == NULL) { - printf("lsf_map: unable to allocate memory\n"); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: " + "lsf_map: unable to allocate entry\n", + current_thread())); return KERN_NO_SPACE; } shared_file_available_hash_ele--; @@ -2434,7 +2694,7 @@ lsf_map( tptr = &(entry->mappings); entry->base_address = base_offset; - + total_size = 0; /* establish each requested mapping */ for (i = 0; i < map_cnt; i++) { @@ -2449,8 +2709,20 @@ lsf_map( (((mappings[i].sfm_address + base_offset + mappings[i].sfm_size - 1) & GLOBAL_SHARED_SEGMENT_MASK) != 0x10000000)) { - lsf_unload(file_object, - entry->base_address, sm_info); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: lsf_map: " + "RW mapping #%d not in segment", + current_thread(), i)); + shared_region_dump_mappings( + SHARED_REGION_TRACE_ERROR, + mappings, map_cnt, base_offset); + + lsf_deallocate(entry, + file_object, + entry->base_address, + sm_info, + TRUE); return KERN_INVALID_ARGUMENT; } } else { @@ -2461,15 +2733,41 @@ lsf_map( ((mappings[i].sfm_address + base_offset + mappings[i].sfm_size - 1) & GLOBAL_SHARED_SEGMENT_MASK)) { - lsf_unload(file_object, - entry->base_address, sm_info); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: lsf_map: " + "RO mapping #%d not in segment", + current_thread(), i)); + shared_region_dump_mappings( + SHARED_REGION_TRACE_ERROR, + mappings, map_cnt, base_offset); + + lsf_deallocate(entry, + file_object, + entry->base_address, + sm_info, + TRUE); return KERN_INVALID_ARGUMENT; } } if (!(mappings[i].sfm_init_prot & VM_PROT_ZF) && ((mappings[i].sfm_file_offset + mappings[i].sfm_size) > (file_size))) { - lsf_unload(file_object, entry->base_address, sm_info); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: lsf_map: " + "ZF mapping #%d beyond EOF", + current_thread(), i)); + shared_region_dump_mappings(SHARED_REGION_TRACE_ERROR, + mappings, map_cnt, + base_offset); + + + lsf_deallocate(entry, + file_object, + entry->base_address, + sm_info, + TRUE); return KERN_INVALID_ARGUMENT; } target_address = entry->base_address + @@ -2481,7 +2779,13 @@ lsf_map( } region_entry = (vm_named_entry_t) region_handle->ip_kobject; - if (mach_vm_map(region_entry->backing.map, + total_size += mappings[i].sfm_size; + if (mappings[i].sfm_size == 0) { + /* nothing to map... */ + kr = KERN_SUCCESS; + } else { + kr = mach_vm_map( + region_entry->backing.map, &target_address, vm_map_round_page(mappings[i].sfm_size), 0, @@ -2493,8 +2797,18 @@ lsf_map( (VM_PROT_READ|VM_PROT_EXECUTE)), (mappings[i].sfm_max_prot & (VM_PROT_READ|VM_PROT_EXECUTE)), - VM_INHERIT_DEFAULT) != KERN_SUCCESS) { - lsf_unload(file_object, entry->base_address, sm_info); + VM_INHERIT_DEFAULT); + } + if (kr != KERN_SUCCESS) { + vm_offset_t old_base_address; + + old_base_address = entry->base_address; + lsf_deallocate(entry, + file_object, + entry->base_address, + sm_info, + TRUE); + entry = NULL; if (slide_p != NULL) { /* @@ -2503,25 +2817,65 @@ lsf_map( * shared region, so let's try and slide it... */ + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: lsf_map: " + "mapping #%d failed to map, " + "kr=0x%x, sliding...\n", + current_thread(), i, kr)); + shared_region_dump_mappings( + SHARED_REGION_TRACE_INFO, + mappings, map_cnt, base_offset); + shared_region_dump_conflict_info( + SHARED_REGION_TRACE_CONFLICT, + region_entry->backing.map, + (old_base_address + + ((mappings[i].sfm_address) + & region_mask)), + vm_map_round_page(mappings[i].sfm_size)); + /* lookup an appropriate spot */ kr = lsf_slide(map_cnt, mappings, sm_info, &base_offset); if (kr == KERN_SUCCESS) { /* try and map it there ... */ - entry->base_address = base_offset; goto restart_after_slide; } /* couldn't slide ... */ } - + + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_CONFLICT, + ("shared_region: %p: lsf_map: " + "mapping #%d failed to map, " + "kr=0x%x, no sliding\n", + current_thread(), i, kr)); + shared_region_dump_mappings( + SHARED_REGION_TRACE_INFO, + mappings, map_cnt, base_offset); + shared_region_dump_conflict_info( + SHARED_REGION_TRACE_CONFLICT, + region_entry->backing.map, + (old_base_address + + ((mappings[i].sfm_address) + & region_mask)), + vm_map_round_page(mappings[i].sfm_size)); return KERN_FAILURE; } /* record this mapping */ file_mapping = (loaded_mapping_t *)zalloc(lsf_zone); if (file_mapping == NULL) { - lsf_unload(file_object, entry->base_address, sm_info); - printf("lsf_map: unable to allocate memory\n"); + lsf_deallocate(entry, + file_object, + entry->base_address, + sm_info, + TRUE); + SHARED_REGION_TRACE( + SHARED_REGION_TRACE_ERROR, + ("shared_region: %p: " + "lsf_map: unable to allocate mapping\n", + current_thread())); return KERN_NO_SPACE; } shared_file_available_hash_ele--; @@ -2547,14 +2901,20 @@ lsf_map( *slide_p = base_offset - original_base_offset; } - if (sm_info->flags & SHARED_REGION_STANDALONE) { + if ((sm_info->flags & SHARED_REGION_STANDALONE) || + (total_size == 0)) { /* - * We have a standalone and private shared region, so we + * Two cases: + * 1. we have a standalone and private shared region, so we * don't really need to keep the information about each file * and each mapping. Just deallocate it all. + * 2. the total size of the mappings is 0, so nothing at all + * was mapped. Let's not waste kernel resources to describe + * nothing. + * * XXX we still have the hash table, though... */ - lsf_deallocate(file_object, entry->base_address, sm_info, + lsf_deallocate(entry, file_object, entry->base_address, sm_info, FALSE); } @@ -2573,7 +2933,7 @@ lsf_unload( vm_offset_t base_offset, shared_region_task_mappings_t sm_info) { - lsf_deallocate(file_object, base_offset, sm_info, TRUE); + lsf_deallocate(NULL, file_object, base_offset, sm_info, TRUE); } /* @@ -2585,6 +2945,7 @@ lsf_unload( */ static void lsf_deallocate( + load_struct_t *target_entry, void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info, @@ -2593,11 +2954,15 @@ lsf_deallocate( load_struct_t *entry; loaded_mapping_t *map_ele; loaded_mapping_t *back_ptr; + kern_return_t kr; - LSF_DEBUG(("lsf_deallocate(file=%p,base=0x%x,sm_info=%p,unload=%d)\n", - file_object, base_offset, sm_info, unload)); - entry = lsf_hash_delete(file_object, base_offset, sm_info); - if(entry) { + LSF_DEBUG(("lsf_deallocate(target=%p,file=%p,base=0x%x,sm_info=%p,unload=%d)\n", + target_entry, file_object, base_offset, sm_info, unload)); + entry = lsf_hash_delete(target_entry, + file_object, + base_offset, + sm_info); + if (entry) { map_ele = entry->mappings; while(map_ele != NULL) { if (unload) { @@ -2614,10 +2979,11 @@ lsf_deallocate( region_entry = (vm_named_entry_t) region_handle->ip_kobject; - vm_deallocate(region_entry->backing.map, - (entry->base_address + - map_ele->mapping_offset), - map_ele->size); + kr = vm_deallocate(region_entry->backing.map, + (entry->base_address + + map_ele->mapping_offset), + map_ele->size); + assert(kr == KERN_SUCCESS); } back_ptr = map_ele; map_ele = map_ele->next; @@ -2626,14 +2992,14 @@ lsf_deallocate( back_ptr, back_ptr->mapping_offset, back_ptr->size)); zfree(lsf_zone, back_ptr); - shared_file_available_hash_ele++; + shared_file_available_hash_ele++; } LSF_DEBUG(("lsf_deallocate: freeing entry %p\n", entry)); LSF_ALLOC_DEBUG(("lsf_deallocate: entry=%p", entry)); zfree(lsf_zone, entry); shared_file_available_hash_ele++; } - LSF_DEBUG(("lsf_unload: done\n")); + LSF_DEBUG(("lsf_deallocate: done\n")); } /* integer is from 1 to 100 and represents percent full */ diff --git a/osfmk/vm/vm_shared_memory_server.h b/osfmk/vm/vm_shared_memory_server.h index 7d0774825..444821452 100644 --- a/osfmk/vm/vm_shared_memory_server.h +++ b/osfmk/vm/vm_shared_memory_server.h @@ -37,6 +37,7 @@ #include #include +#include #if DEBUG extern int shared_region_debug; @@ -50,8 +51,17 @@ extern int shared_region_debug; #define SHARED_REGION_DEBUG(args) #endif /* DEBUG */ -extern mach_port_t shared_text_region_handle; -extern mach_port_t shared_data_region_handle; +extern int shared_region_trace_level; +#define SHARED_REGION_TRACE_NONE 0 /* no trace */ +#define SHARED_REGION_TRACE_ERROR 1 /* trace abnormal events */ +#define SHARED_REGION_TRACE_CONFLICT 2 /* trace library conflicts */ +#define SHARED_REGION_TRACE_INFO 3 /* trace all events */ +#define SHARED_REGION_TRACE(level, args) \ + MACRO_BEGIN \ + if (level <= shared_region_trace_level) { \ + printf args; \ + } \ + MACRO_END struct shared_region_task_mappings { mach_port_t text_region; @@ -140,7 +150,7 @@ typedef struct shared_region_object_chain *shared_region_object_chain_t; /* address space shared region descriptor */ struct shared_region_mapping { decl_mutex_data(, Lock) /* Synchronization */ - int ref_count; + unsigned int ref_count; unsigned int fs_base; unsigned int system; mach_port_t text_region; @@ -221,7 +231,9 @@ extern kern_return_t shared_region_mapping_create( vm_offset_t client_base, shared_region_mapping_t *shared_region, vm_offset_t alt_base, - vm_offset_t alt_next); + vm_offset_t alt_next, + int fs_base, + int system); extern kern_return_t shared_region_mapping_ref( shared_region_mapping_t shared_region); @@ -267,7 +279,9 @@ __private_extern__ struct load_struct *lsf_remove_regions_mappings_lock( extern unsigned int lsf_mapping_pool_gauge(void); extern kern_return_t shared_file_create_system_region( - shared_region_mapping_t *shared_region); + shared_region_mapping_t *shared_region, + int fs_base, + int system); extern void remove_all_shared_regions(void); @@ -291,6 +305,20 @@ extern kern_return_t mach_memory_entry_purgable_control( vm_purgable_t control, int *state); +extern kern_return_t mach_memory_entry_page_op( + ipc_port_t entry_port, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags); + +extern kern_return_t mach_memory_entry_range_op( + ipc_port_t entry_port, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range); + #endif /* KERNEL_PRIVATE */ #endif /* _VM_SHARED_MEMORY_SERVER_H_ */ diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index a659f45a9..e100b9c7e 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -2342,6 +2342,11 @@ mach_make_memory_entry_64( shadow_object = map_entry->object.vm_object; vm_object_unlock(object); + prot = map_entry->protection & ~VM_PROT_WRITE; +#ifdef STACK_ONLY_NX + if (map_entry->alias != VM_MEMORY_STACK && prot) + prot |= VM_PROT_EXECUTE; +#endif vm_object_pmap_protect( object, map_entry->offset, total_size, @@ -2350,7 +2355,7 @@ mach_make_memory_entry_64( ? PMAP_NULL : target_map->pmap), map_entry->vme_start, - map_entry->protection & ~VM_PROT_WRITE); + prot); total_size -= (map_entry->vme_end - map_entry->vme_start); next_entry = map_entry->vme_next; @@ -2867,6 +2872,106 @@ mach_destroy_memory_entry( mutex_unlock(&(named_entry)->Lock); } +/* Allow manipulation of individual page state. This is actually part of */ +/* the UPL regimen but takes place on the memory entry rather than on a UPL */ + +kern_return_t +mach_memory_entry_page_op( + ipc_port_t entry_port, + vm_object_offset_t offset, + int ops, + ppnum_t *phys_entry, + int *flags) +{ + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || mem_entry->is_pager) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_page_op(object, offset, ops, phys_entry, flags); + + vm_object_deallocate(object); + + return kr; +} + +/* + * mach_memory_entry_range_op offers performance enhancement over + * mach_memory_entry_page_op for page_op functions which do not require page + * level state to be returned from the call. Page_op was created to provide + * a low-cost alternative to page manipulation via UPLs when only a single + * page was involved. The range_op call establishes the ability in the _op + * family of functions to work on multiple pages where the lack of page level + * state handling allows the caller to avoid the overhead of the upl structures. + */ + +kern_return_t +mach_memory_entry_range_op( + ipc_port_t entry_port, + vm_object_offset_t offset_beg, + vm_object_offset_t offset_end, + int ops, + int *range) +{ + vm_named_entry_t mem_entry; + vm_object_t object; + kern_return_t kr; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || mem_entry->is_pager) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_reference(object); + named_entry_unlock(mem_entry); + + kr = vm_object_range_op(object, + offset_beg, + offset_end, + ops, + range); + + vm_object_deallocate(object); + + return kr; +} kern_return_t diff --git a/pexpert/conf/Makefile.i386 b/pexpert/conf/Makefile.i386 index f807e9ecb..895c8b5fe 100644 --- a/pexpert/conf/Makefile.i386 +++ b/pexpert/conf/Makefile.i386 @@ -10,6 +10,7 @@ CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) OBJS_NO_WERROR= \ ioconf.o \ bootargs.o \ + pe_init.o \ device_tree.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff --git a/pexpert/gen/bootargs.c b/pexpert/gen/bootargs.c index 09272e238..558175349 100644 --- a/pexpert/gen/bootargs.c +++ b/pexpert/gen/bootargs.c @@ -165,8 +165,8 @@ getval( char *s, int *val) { - register unsigned radix, intval; - register unsigned char c; + unsigned int radix, intval; + char c; int sign = 1; if (*s == '=') { @@ -175,7 +175,7 @@ getval( sign = -1, s++; intval = *s++-'0'; radix = 10; - if (intval == 0) + if (intval == 0) { switch(*s) { case 'x': @@ -199,28 +199,44 @@ getval( if (!isargsep(*s)) return (STR); } + } else if (intval >= radix) { + return (STR); + } for(;;) { - if (((c = *s++) >= '0') && (c <= '9')) + c = *s++; + if (isargsep(c)) + break; + if ((radix <= 10) && + ((c >= '0') && (c <= ('9' - (10 - radix))))) { + c -= '0'; + } else if ((radix == 16) && + ((c >= '0') && (c <= '9'))) { c -= '0'; - else if ((c >= 'a') && (c <= 'f')) + } else if ((radix == 16) && + ((c >= 'a') && (c <= 'f'))) { c -= 'a' - 10; - else if ((c >= 'A') && (c <= 'F')) + } else if ((radix == 16) && + ((c >= 'A') && (c <= 'F'))) { c -= 'A' - 10; - else if (c == 'k' || c == 'K') - { sign *= 1024; break; } - else if (c == 'm' || c == 'M') - { sign *= 1024 * 1024; break; } - else if (c == 'g' || c == 'G') - { sign *= 1024 * 1024 * 1024; break; } - else if (isargsep(c)) + } else if (c == 'k' || c == 'K') { + sign *= 1024; break; - else + } else if (c == 'm' || c == 'M') { + sign *= 1024 * 1024; + break; + } else if (c == 'g' || c == 'G') { + sign *= 1024 * 1024 * 1024; + break; + } else { return (STR); + } if (c >= radix) return (STR); intval *= radix; intval += c; } + if (!isargsep(c) && !isargsep(*s)) + return STR; *val = intval * sign; return (NUM); } diff --git a/pexpert/i386/kd.c b/pexpert/i386/kd.c index a01e99c12..147c1376a 100644 --- a/pexpert/i386/kd.c +++ b/pexpert/i386/kd.c @@ -81,8 +81,6 @@ extern void cpu_shutdown(void); -int cngetc(void); -int cnmaygetc(void); void kdreboot(void); /* @@ -116,35 +114,6 @@ void kdreboot(void); #define K_CMD_ECHO 0xee /* used for diagnostic testing */ #define K_CMD_RESET 0xfe /* issue a system reset */ -/* - * cngetc / cnmaygetc - * - * Get one character using polling, rather than interrupts. - * Used by the kernel debugger. - */ - -int -cngetc(void) -{ - char c; - - if ( 0 == (*PE_poll_input)(0, &c) ) - return ( c ); - else - return ( 0 ); -} - -int -cnmaygetc(void) -{ - char c; - - if ( 0 == (*PE_poll_input)(0, &c) ) - return ( c ); - else - return ( 0 ); -} - /* * kd_sendcmd * diff --git a/pexpert/i386/pe_bootargs.c b/pexpert/i386/pe_bootargs.c index 5f17b84c1..d5c1be3b5 100644 --- a/pexpert/i386/pe_bootargs.c +++ b/pexpert/i386/pe_bootargs.c @@ -26,5 +26,5 @@ char * PE_boot_args( void) { - return((char *)((KernelBootArgs_t *)PE_state.bootArgs)->bootString); + return ((boot_args *)PE_state.bootArgs)->CommandLine; } diff --git a/pexpert/i386/pe_identify_machine.c b/pexpert/i386/pe_identify_machine.c index d66e70ace..fe013d819 100644 --- a/pexpert/i386/pe_identify_machine.c +++ b/pexpert/i386/pe_identify_machine.c @@ -23,9 +23,6 @@ #include #include -/* External declarations */ -unsigned int LockTimeOut = 1250000000; /* XXX - Need real value for i386 */ - /* Local declarations */ void pe_identify_machine(boot_args *args); diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 89d90db7f..82247037d 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "fakePPCStructs.h" #include "fakePPCDeviceTree.h" @@ -40,9 +41,6 @@ extern void pe_identify_machine(void * args); extern void initialize_screen(void *, unsigned int); -/* Local references */ -static int PE_fb_mode = TEXT_MODE; - /* private globals */ PE_state_t PE_state; dt_data gMemoryMapNode; @@ -53,6 +51,9 @@ dt_data gCompatibleProp; /* Clock Frequency Info */ clock_frequency_info_t gPEClockFrequencyInfo; +void *gPEEFISystemTable = 0; +void *gPEEFIRuntimeServices = 0; + int PE_initialize_console( PE_Video * info, int op ) { static int last_console = -1; @@ -72,7 +73,7 @@ int PE_initialize_console( PE_Video * info, int op ) bootInfo.v_depth = info->v_depth; bInfo = &bootInfo; if (info == &PE_state.video) { - bootInfo.v_display = PE_fb_mode; + bootInfo.v_display = info->v_display; } else { bootInfo.v_display = GRAPHICS_MODE; } @@ -84,19 +85,16 @@ int PE_initialize_console( PE_Video * info, int op ) case kPEDisableScreen: initialize_screen((void *) bInfo, op); -#ifdef FIXME - last_console = switch_to_serial_console(); -#endif kprintf("kPEDisableScreen %d\n", last_console); + if (!console_is_serial()) + last_console = switch_to_serial_console(); break; case kPEEnableScreen: initialize_screen((void *) bInfo, op); kprintf("kPEEnableScreen %d\n", last_console); -#ifdef FIXME if( last_console != -1) switch_to_old_console( last_console); -#endif break; default: @@ -109,11 +107,7 @@ int PE_initialize_console( PE_Video * info, int op ) void PE_init_iokit(void) { - long * dt; - int i; - KernelBootArgs_t *kap = (KernelBootArgs_t *)PE_state.bootArgs; enum { kMaxBootVar = 128 }; - char *rdValue, *platformValue; typedef struct { char name[32]; @@ -121,98 +115,50 @@ void PE_init_iokit(void) unsigned long value[2]; } DriversPackageProp; + boolean_t bootClutInitialized = FALSE; + boolean_t norootInitialized = FALSE; + DTEntry entry; + int size; + void ** map; + PE_init_kprintf(TRUE); PE_init_printf(TRUE); + kprintf("Kernel boot args: '%s'\n", PE_boot_args()); + /* - * Update the fake device tree with the driver information provided by - * the booter. + * Fetch the CLUT and the noroot image. */ + boot_progress_element * bootPict; - gDriversProp.length = kap->numBootDrivers * sizeof(DriversPackageProp); - gMemoryMapNode.length = 2 * sizeof(long); - - rdValue = kalloc(kMaxBootVar); - if ( PE_parse_boot_arg("rd", rdValue) ) { - if (*rdValue == '*') { - gRootpathProp.address = (rdValue + 1); - } else { - gRootpathProp.address = rdValue; - } - strcat(rdValue, ","); - } else { - gRootpathProp.address = rdValue; - rdValue[0] = '\0'; - } - strcat(rdValue, kap->bootFile); - gRootpathProp.length = strlen(rdValue) + 1; - - platformValue = kalloc(kMaxBootVar); - if ( ! PE_parse_boot_arg("platform", platformValue) ) { - strcpy(platformValue, kDefaultPlatformName); - } - gCompatibleProp.address = platformValue; - gCompatibleProp.length = strlen(platformValue) + 1; - - dt = (long *) createdt( fakePPCDeviceTree, - &((boot_args*)PE_state.fakePPCBootArgs)->deviceTreeLength ); - - kfree(rdValue, kMaxBootVar); - kfree(platformValue, kMaxBootVar); - - - if ( dt ) - { - DriversPackageProp * prop = (DriversPackageProp *) gDriversProp.address; - - /* Copy driver info in kernBootStruct to fake device tree */ - - for ( i = 0; i < kap->numBootDrivers; i++, prop++ ) - { - switch ( kap->driverConfig[i].type ) - { - case kBootDriverTypeKEXT: - sprintf(prop->name, "Driver-%lx", kap->driverConfig[i].address); - break; - - case kBootDriverTypeMKEXT: - sprintf(prop->name, "DriversPackage-%lx", kap->driverConfig[i].address); - break; - - default: - sprintf(prop->name, "DriverBogus-%lx", kap->driverConfig[i].address); - break; - } - prop->length = sizeof(prop->value); - prop->value[0] = kap->driverConfig[i].address; - prop->value[1] = kap->driverConfig[i].size; + if( kSuccess == DTLookupEntry(0, "/chosen/memory-map", &entry)) { + if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) { + bcopy( map[0], appleClut8, sizeof(appleClut8) ); + bootClutInitialized = TRUE; } - *((long *)gMemoryMapNode.address) = kap->numBootDrivers + 1; + if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) { + bootPict = (boot_progress_element *) map[0]; + default_noroot.width = bootPict->width; + default_noroot.height = bootPict->height; + default_noroot.dx = 0; + default_noroot.dy = bootPict->yOffset; + default_noroot_data = &bootPict->data[0]; + norootInitialized = TRUE; + } } - /* Setup powermac_info and powermac_machine_info structures */ - - ((boot_args*)PE_state.fakePPCBootArgs)->deviceTreeP = (unsigned long *) dt; - ((boot_args*)PE_state.fakePPCBootArgs)->topOfKernelData = (unsigned long) kalloc(0x2000); - - /* - * Setup the OpenFirmware Device Tree routines - * so the console can be found and the right I/O space - * can be used.. - */ - DTInit(dt); - - /* - * Fetch the CLUT and the noroot image. - */ + if (!bootClutInitialized) { bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) ); + } + if (!norootInitialized) { default_noroot.width = kFailedBootWidth; default_noroot.height = kFailedBootHeight; default_noroot.dx = 0; default_noroot.dy = kFailedBootOffset; default_noroot_data = failedBootPict; + } /* * Initialize the panic UI @@ -225,53 +171,50 @@ void PE_init_iokit(void) vc_progress_initialize( &default_progress, default_progress_data, (unsigned char *) appleClut8 ); - (void) StartIOKit( (void*)dt, PE_state.bootArgs, 0, 0); + (void) StartIOKit( PE_state.deviceTreeHead, PE_state.bootArgs, gPEEFIRuntimeServices, 0); } -void PE_init_platform(boolean_t vm_initialized, void * args) +void PE_init_platform(boolean_t vm_initialized, void * _args) { - if (PE_state.initialized == FALSE) - { - KernelBootArgs_t *kap = (KernelBootArgs_t *) args; + boot_args *args = (boot_args *)_args; + if (PE_state.initialized == FALSE) { PE_state.initialized = TRUE; - PE_state.bootArgs = args; - PE_state.video.v_baseAddr = kap->video.v_baseAddr; - PE_state.video.v_rowBytes = kap->video.v_rowBytes; - PE_state.video.v_height = kap->video.v_height; - PE_state.video.v_width = kap->video.v_width; - PE_state.video.v_depth = kap->video.v_depth; - PE_state.video.v_display = kap->video.v_display; - PE_fb_mode = kap->graphicsMode; - PE_state.fakePPCBootArgs = (boot_args *)&fakePPCBootArgs; - ((boot_args *)PE_state.fakePPCBootArgs)->machineType = 386; - - if (PE_fb_mode == TEXT_MODE) - { - /* Force a text display if the booter did not setup a - * VESA frame buffer. - */ - PE_state.video.v_display = 0; - } + + // New EFI-style + PE_state.bootArgs = _args; + PE_state.deviceTreeHead = args->deviceTreeP; + PE_state.video.v_baseAddr = args->Video.v_baseAddr; + PE_state.video.v_rowBytes = args->Video.v_rowBytes; + PE_state.video.v_width = args->Video.v_width; + PE_state.video.v_height = args->Video.v_height; + PE_state.video.v_depth = args->Video.v_depth; + PE_state.video.v_display = args->Video.v_display; + strcpy( PE_state.video.v_pixelFormat, "PPPPPPPP"); } - if (!vm_initialized) - { + if (!vm_initialized) { /* Hack! FIXME.. */ outb(0x21, 0xff); /* Maskout all interrupts Pic1 */ outb(0xa1, 0xff); /* Maskout all interrupts Pic2 */ - pe_identify_machine(args); + if (PE_state.deviceTreeHead) { + DTInit(PE_state.deviceTreeHead); } - else - { + + pe_identify_machine(args); + } else { + DTEntry entry; + void *ptr; + uint32_t size; + pe_init_debug(); } } void PE_create_console( void ) { - if ( PE_state.video.v_display ) + if ( PE_state.video.v_display == GRAPHICS_MODE ) PE_initialize_console( &PE_state.video, kPEGraphicsMode ); else PE_initialize_console( &PE_state.video, kPETextMode ); @@ -281,22 +224,6 @@ int PE_current_console( PE_Video * info ) { *info = PE_state.video; - if ( PE_fb_mode == TEXT_MODE ) - { - /* - * FIXME: Prevent the IOBootFrameBuffer from starting up - * when we are in Text mode. - */ - info->v_baseAddr = 0; - - /* - * Scale the size of the text screen from characters - * to pixels. - */ - info->v_width *= 8; // CHARWIDTH - info->v_height *= 16; // CHARHEIGHT - } - return (0); } diff --git a/pexpert/i386/pe_interrupt.c b/pexpert/i386/pe_interrupt.c index 322de1037..49afab3d5 100644 --- a/pexpert/i386/pe_interrupt.c +++ b/pexpert/i386/pe_interrupt.c @@ -26,7 +26,7 @@ #include -void PE_incoming_interrupt(int, void *); +void PE_incoming_interrupt(x86_saved_state_t *); struct i386_interrupt_handler { @@ -43,21 +43,43 @@ i386_interrupt_handler_t PE_interrupt_handler; void -PE_incoming_interrupt(int interrupt, void *state) +PE_incoming_interrupt(x86_saved_state_t *state) { i386_interrupt_handler_t *vector; + uint64_t rip; + int interrupt; + boolean_t user_mode = FALSE; + + if (is_saved_state64(state) == TRUE) { + x86_saved_state64_t *state64; + + state64 = saved_state64(state); + rip = state64->isf.rip; + interrupt = state64->isf.trapno; + user_mode = TRUE; + } else { + x86_saved_state32_t *state32; + + state32 = saved_state32(state); + if (state32->cs & 0x03) + user_mode = TRUE; + rip = state32->eip; + interrupt = state32->trapno; + } - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, - 0, ((unsigned int *)state)[7], 0, 0, 0); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, + interrupt, (unsigned int)rip, user_mode, 0, 0); vector = &PE_interrupt_handler; if (!lapic_interrupt(interrupt, state)) { - vector->handler(vector->target, state, vector->nub, interrupt); + vector->handler(vector->target, NULL, vector->nub, interrupt); } - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, - 0, 0, 0, 0, 0); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, + 0, 0, 0, 0, 0); } void PE_install_interrupt_handler(void *nub, diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c index 7e0ed56f1..5501f3ddb 100644 --- a/pexpert/i386/pe_kprintf.c +++ b/pexpert/i386/pe_kprintf.c @@ -109,3 +109,10 @@ void kprintf(const char *fmt, ...) ml_set_interrupts_enabled(state); } } + +extern void kprintf_break_lock(void); +void +kprintf_break_lock(void) +{ + simple_lock_init(&kprintf_lock, 0); +} diff --git a/pexpert/i386/pe_serial.c b/pexpert/i386/pe_serial.c index 72ec3e136..2bce2811c 100644 --- a/pexpert/i386/pe_serial.c +++ b/pexpert/i386/pe_serial.c @@ -67,12 +67,18 @@ enum { }; enum { + UART_LSR_DR = 0x01, + UART_LSR_OE = 0x02, + UART_LSR_PE = 0x04, + UART_LSR_FE = 0x08, UART_LSR_THRE = 0x20 }; -#define UART_BAUD_RATE 115200 +static unsigned uart_baud_rate = 115200; #define UART_PORT_ADDR COM1_PORT_ADDR +#define UART_CLOCK 1843200 /* 1.8432 MHz clock */ + #define WRITE(r, v) outb(UART_PORT_ADDR + UART_##r, v) #define READ(r) inb(UART_PORT_ADDR + UART_##r) #define DELAY(x) { volatile int _d_; for (_d_ = 0; _d_ < (10000*x); _d_++) ; } @@ -96,8 +102,6 @@ uart_probe( void ) static void uart_set_baud_rate( unsigned long baud_rate ) { - #define UART_CLOCK 1843200 /* 1.8432 MHz clock */ - const unsigned char lcr = READ( LCR ); unsigned long div; @@ -120,8 +124,40 @@ uart_putc( char c ) WRITE( THR, c ); } +static int +uart_getc( void ) +{ + /* + * This function returns: + * -1 : no data + * -2 : receiver error + * >0 : character received + */ + + unsigned char lsr; + + if (!uart_initted) return -1; + + lsr = READ( LSR ); + + if ( lsr & (UART_LSR_FE | UART_LSR_PE | UART_LSR_OE) ) + { + READ( RBR ); /* discard */ + return -2; + } + + if ( lsr & UART_LSR_DR ) + { + return READ( RBR ); + } + + return -1; +} + int serial_init( void ) { + unsigned serial_baud_rate = 0; + if ( /*uart_initted ||*/ uart_probe() == 0 ) return 0; /* Disable hardware interrupts */ @@ -137,9 +173,16 @@ int serial_init( void ) WRITE( LCR, UART_LCR_8BITS ); - /* Set baud rate */ + /* Set baud rate - use the supplied boot-arg if available */ - uart_set_baud_rate( UART_BAUD_RATE ); + if (PE_parse_boot_arg("serialbaud", &serial_baud_rate)) + { + /* Valid divisor? */ + if (!((UART_CLOCK / 16) % serial_baud_rate)) { + uart_baud_rate = serial_baud_rate; + } + } + uart_set_baud_rate( uart_baud_rate ); /* Assert DTR# and RTS# lines (OUT2?) */ @@ -162,5 +205,5 @@ void serial_putc( char c ) int serial_getc( void ) { - return 0; /* not supported */ + return uart_getc(); } diff --git a/pexpert/pexpert/i386/Makefile b/pexpert/pexpert/i386/Makefile index 2b6faf73e..181965c90 100644 --- a/pexpert/pexpert/i386/Makefile +++ b/pexpert/pexpert/i386/Makefile @@ -9,6 +9,7 @@ include $(MakeInc_def) DATAFILES = \ boot.h \ + efi.h \ fb_entries.h \ protos.h diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h index 4a79d4b3b..22169e861 100644 --- a/pexpert/pexpert/i386/boot.h +++ b/pexpert/pexpert/i386/boot.h @@ -22,14 +22,11 @@ #ifndef _PEXPERT_I386_BOOT_H #define _PEXPERT_I386_BOOT_H -/* - * What the booter leaves behind for the kernel. - */ +#include /* - * Maximum number of boot drivers that can be loaded. + * What the booter leaves behind for the kernel. */ -#define NDRIVERS 500 /* * Types of boot driver that may be loaded by the booter. @@ -40,267 +37,112 @@ enum { kBootDriverTypeMKEXT = 2 }; -typedef struct { - unsigned long address; // address where driver was loaded - unsigned long size; // number of bytes - unsigned long type; // driver type -} driver_config_t; - -/* - * APM BIOS information. - */ -typedef struct { - unsigned short major_vers; // == 0 if not present - unsigned short minor_vers; - unsigned long cs32_base; - unsigned long cs16_base; - unsigned long ds_base; - unsigned long cs_length; - unsigned long ds_length; - unsigned long entry_offset; - union { - struct { - unsigned long mode_16 :1; - unsigned long mode_32 :1; - unsigned long idle_slows_cpu :1; - unsigned long reserved :29; - } f; - unsigned long data; - } flags; - unsigned long connected; -} APM_config_t; - -/* - * PCI bus information. - */ -typedef struct _PCI_bus_info_t { - union { - struct { - unsigned char configMethod1 :1; - unsigned char configMethod2 :1; - unsigned char :2; - unsigned char specialCycle1 :1; - unsigned char specialCycle2 :1; - } s; - unsigned char d; - } u_bus; - unsigned char maxBusNum; - unsigned char majorVersion; - unsigned char minorVersion; - unsigned char BIOSPresent; -} PCI_bus_info_t; - /* * Video information. */ struct boot_video { - unsigned long v_baseAddr; // Base address of video memory - unsigned long v_display; // Display Code (if Applicable - unsigned long v_rowBytes; // Number of bytes per pixel row - unsigned long v_width; // Width - unsigned long v_height; // Height - unsigned long v_depth; // Pixel Depth + uint32_t v_baseAddr; // Base address of video memory + uint32_t v_display; // Display Code + uint32_t v_rowBytes; // Number of bytes per pixel row + uint32_t v_width; // Width + uint32_t v_height; // Height + uint32_t v_depth; // Pixel Depth }; typedef struct boot_video boot_video; +/* Values for v_display */ + +#define VGA_TEXT_MODE 0 #define GRAPHICS_MODE 1 -#define TEXT_MODE 0 +#define FB_TEXT_MODE 2 -/* - * INT15, E820h - Query System Address Map. - * - * Documented in ACPI Specification Rev 2.0, - * Chapter 15 (System Address Map Interfaces). - */ - -/* - * ACPI defined memory range types. - */ enum { - kMemoryRangeUsable = 1, // RAM usable by the OS. - kMemoryRangeReserved = 2, // Reserved. (Do not use) - kMemoryRangeACPI = 3, // ACPI tables. Can be reclaimed. - kMemoryRangeNVS = 4, // ACPI NVS memory. (Do not use) - - /* Undefined types should be treated as kMemoryRangeReserved */ + kEfiReservedMemoryType = 0, + kEfiLoaderCode = 1, + kEfiLoaderData = 2, + kEfiBootServicesCode = 3, + kEfiBootServicesData = 4, + kEfiRuntimeServicesCode = 5, + kEfiRuntimeServicesData = 6, + kEfiConventionalMemory = 7, + kEfiUnusableMemory = 8, + kEfiACPIReclaimMemory = 9, + kEfiACPIMemoryNVS = 10, + kEfiMemoryMappedIO = 11, + kEfiMemoryMappedIOPortSpace = 12, + kEfiPalCode = 13, + kEfiMaxMemoryType = 14 }; /* * Memory range descriptor. */ -typedef struct MemoryRange { - unsigned long long base; // 64-bit base address - unsigned long long length; // 64-bit length in bytes - unsigned long type; // type of memory range - unsigned long reserved; -} MemoryRange; - -#define kMemoryMapCountMax 40 - -/* - * BIOS drive information. - */ -struct boot_drive_info { - struct drive_params { - unsigned short buf_size; - unsigned short info_flags; - unsigned long phys_cyls; - unsigned long phys_heads; - unsigned long phys_spt; - unsigned long long phys_sectors; - unsigned short phys_nbps; - unsigned short dpte_offset; - unsigned short dpte_segment; - unsigned short key; - unsigned char path_len; - unsigned char reserved1; - unsigned short reserved2; - unsigned char bus_type[4]; - unsigned char interface_type[8]; - unsigned char interface_path[8]; - unsigned char dev_path[8]; - unsigned char reserved3; - unsigned char checksum; - } params __attribute__((packed)); - struct drive_dpte { - unsigned short io_port_base; - unsigned short control_port_base; - unsigned char head_flags; - unsigned char vendor_info; - unsigned char irq : 4; - unsigned char irq_unused : 4; - unsigned char block_count; - unsigned char dma_channel : 4; - unsigned char dma_type : 4; - unsigned char pio_type : 4; - unsigned char pio_unused : 4; - unsigned short option_flags; - unsigned short reserved; - unsigned char revision; - unsigned char checksum; - } dpte __attribute__((packed)); -} __attribute__((packed)); -typedef struct boot_drive_info boot_drive_info_t; - -#define MAX_BIOS_DEVICES 8 - -#define OLD_BOOT_STRING_LEN 160 -#define BOOT_STRING_LEN 1024 -#define CONFIG_SIZE (12 * 4096) - -/* Old structure for compatibility */ - -typedef struct { - short version; - char bootString[OLD_BOOT_STRING_LEN]; // boot arguments - int magicCookie; // KERNBOOTMAGIC - int numIDEs; // number of IDE drives - int rootdev; // root device - int convmem; // conventional memory - int extmem; // extended memory - char bootFile[128]; // kernel file name - int firstAddr0; // first address for kern convmem - int diskInfo[4]; // info for bios dev 80-83 - int graphicsMode; // booted in graphics mode? - int kernDev; // device kernel was fetched from - int numBootDrivers; // number of drivers loaded - char * configEnd; // pointer to end of config files - int kaddr; // kernel load address - int ksize; // size of kernel - driver_config_t driverConfig[NDRIVERS]; - char _reserved[2052]; - boot_video video; - PCI_bus_info_t pciInfo; - APM_config_t apmConfig; - char config[CONFIG_SIZE]; -} KERNBOOTSTRUCT; - -#define KERNSTRUCT_ADDR ((KERNBOOTSTRUCT *) 0x11000) -#define KERNBOOTMAGIC 0xa7a7a7a7 - -#define BOOT_LINE_LENGTH 256 +typedef struct EfiMemoryRange { + uint32_t Type; + uint32_t pad; + uint64_t PhysicalStart; + uint64_t VirtualStart; + uint64_t NumberOfPages; + uint64_t Attribute; +} EfiMemoryRange; + +#define BOOT_LINE_LENGTH 1024 +#define BOOT_STRING_LEN BOOT_LINE_LENGTH /* * Video information.. */ struct Boot_Video { - unsigned long v_baseAddr; /* Base address of video memory */ - unsigned long v_display; /* Display Code (if Applicable */ - unsigned long v_rowBytes; /* Number of bytes per pixel row */ - unsigned long v_width; /* Width */ - unsigned long v_height; /* Height */ - unsigned long v_depth; /* Pixel Depth */ -}; + uint32_t v_baseAddr; /* Base address of video memory */ + uint32_t v_display; /* Display Code (if Applicable */ + uint32_t v_rowBytes; /* Number of bytes per pixel row */ + uint32_t v_width; /* Width */ + uint32_t v_height; /* Height */ + uint32_t v_depth; /* Pixel Depth */ +} __attribute__((aligned(4))); typedef struct Boot_Video Boot_Video; -/* DRAM Bank definitions - describes physical memory layout. - */ -#define kMaxDRAMBanks 26 /* maximum number of DRAM banks */ - -struct DRAMBank -{ - unsigned long base; /* physical base of DRAM bank */ - unsigned long size; /* size of bank */ -}; -typedef struct DRAMBank DRAMBank; - /* Boot argument structure - passed into Mach kernel at boot time. */ +#define kBootArgsRevision 4 #define kBootArgsVersion 1 -#define kBootArgsRevision 1 + +#define kBootArgsEfiMode32 32 +#define kBootArgsEfiMode64 64 typedef struct boot_args { - unsigned short Revision; /* Revision of boot_args structure */ - unsigned short Version; /* Version of boot_args structure */ - char CommandLine[BOOT_LINE_LENGTH]; /* Passed in command line */ - DRAMBank PhysicalDRAM[kMaxDRAMBanks]; /* base and range pairs for the 26 DRAM banks */ - Boot_Video Video; /* Video Information */ - unsigned long machineType; /* Machine Type (gestalt) */ - void *deviceTreeP; /* Base of flattened device tree */ - unsigned long deviceTreeLength;/* Length of flattened tree */ - unsigned long topOfKernelData;/* Highest address used in kernel data area */ -} boot_args; + uint16_t Revision; /* Revision of boot_args structure */ + uint16_t Version; /* Version of boot_args structure */ + + char CommandLine[BOOT_LINE_LENGTH]; /* Passed in command line */ + + uint32_t MemoryMap; + uint32_t MemoryMapSize; + uint32_t MemoryMapDescriptorSize; + uint32_t MemoryMapDescriptorVersion; -extern boot_args passed_args; + Boot_Video Video; /* Video Information */ -/* New structures */ + uint32_t deviceTreeP; /* Base of flattened device tree */ + uint32_t deviceTreeLength;/* Length of flattened tree */ + uint32_t kaddr; + uint32_t ksize; -#define KERNEL_BOOT_MAGIC 0xa5b6d7e8 + uint32_t efiRuntimeServicesPageStart; + uint32_t efiRuntimeServicesPageCount; + uint32_t efiSystemTable; -typedef struct KernelBootArgs { - unsigned int magicCookie; // KERNEL_BOOT_MAGIC - unsigned short version; - unsigned short revision; - unsigned int size; // size of KernelBootArgs structure - int numDrives; // number of BIOS drives - int rootdev; // root device - int convmem; // conventional memory - int extmem; // extended memory - unsigned int firstAddr0; // first address for kern convmem - int graphicsMode; // booted in graphics mode? - int kernDev; // device kernel was fetched from - int numBootDrivers; // number of drivers loaded - char * configEnd; // pointer to end of config files - unsigned int kaddr; // kernel load address - unsigned int ksize; // size of kernel - char bootFile[128]; // kernel file name - char bootString[BOOT_STRING_LEN]; // boot arguments - driver_config_t driverConfig[NDRIVERS]; - unsigned long memoryMapCount; - MemoryRange memoryMap[kMemoryMapCountMax]; - boot_drive_info_t driveInfo[MAX_BIOS_DEVICES]; - boot_video video; - PCI_bus_info_t pciInfo; - APM_config_t apmConfig; - char config[CONFIG_SIZE]; -} KernelBootArgs_t; + uint8_t efiMode; /* 32 = 32-bit, 64 = 64-bit */ + uint8_t __reserved1[3]; + uint32_t __reserved2[7]; +} __attribute__((aligned(4))) boot_args; #endif /* _PEXPERT_I386_BOOT_H */ diff --git a/pexpert/pexpert/i386/efi.h b/pexpert/pexpert/i386/efi.h new file mode 100644 index 000000000..c8fc91a67 --- /dev/null +++ b/pexpert/pexpert/i386/efi.h @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 2.0 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _PEXPERT_I386_EFI_H +#define _PEXPERT_I386_EFI_H + +typedef uint8_t EFI_UINT8; +typedef uint16_t EFI_UINT16; +typedef uint32_t EFI_UINT32; +typedef uint64_t EFI_UINT64; + +typedef uint32_t EFI_UINTN; + +typedef int8_t EFI_INT8; +typedef int16_t EFI_INT16; +typedef int32_t EFI_INT32; +typedef int64_t EFI_INT64; + +typedef int8_t EFI_CHAR8; +typedef int16_t EFI_CHAR16; +typedef int32_t EFI_CHAR32; +typedef int64_t EFI_CHAR64; + +typedef uint32_t EFI_STATUS; +typedef boolean_t EFI_BOOLEAN; +typedef void VOID; +typedef VOID * EFI_HANDLE; + +typedef uint64_t EFI_PTR64; +typedef uint64_t EFI_HANDLE64; +/* + +Portions Copyright 2004, Intel Corporation +All rights reserved. This program and the accompanying materials +are licensed and made available under the terms and conditions of the BSD License +which accompanies this distribution. The full text of the license may be found at + http://opensource.org/licenses/bsd-license.php + +THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, +WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. + +*/ + + +// +// Modifiers for EFI Runtime and Boot Services +// +#define EFI_RUNTIMESERVICE +#define EFIAPI +#define IN +#define OUT +#define OPTIONAL + +#define EFI_MAX_BIT 0x80000000 + +// +// Set the upper bit to indicate EFI Error. +// +#define EFIERR(a) (EFI_MAX_BIT | (a)) + +#define EFIWARN(a) (a) +#define EFI_ERROR(a) (((INTN) (a)) < 0) + +#define EFI_SUCCESS 0 +#define EFI_LOAD_ERROR EFIERR (1) +#define EFI_INVALID_PARAMETER EFIERR (2) +#define EFI_UNSUPPORTED EFIERR (3) +#define EFI_BAD_BUFFER_SIZE EFIERR (4) +#define EFI_BUFFER_TOO_SMALL EFIERR (5) +#define EFI_NOT_READY EFIERR (6) +#define EFI_DEVICE_ERROR EFIERR (7) +#define EFI_WRITE_PROTECTED EFIERR (8) +#define EFI_OUT_OF_RESOURCES EFIERR (9) +#define EFI_VOLUME_CORRUPTED EFIERR (10) +#define EFI_VOLUME_FULL EFIERR (11) +#define EFI_NO_MEDIA EFIERR (12) +#define EFI_MEDIA_CHANGED EFIERR (13) +#define EFI_NOT_FOUND EFIERR (14) +#define EFI_ACCESS_DENIED EFIERR (15) +#define EFI_NO_RESPONSE EFIERR (16) +#define EFI_NO_MAPPING EFIERR (17) +#define EFI_TIMEOUT EFIERR (18) +#define EFI_NOT_STARTED EFIERR (19) +#define EFI_ALREADY_STARTED EFIERR (20) +#define EFI_ABORTED EFIERR (21) +#define EFI_ICMP_ERROR EFIERR (22) +#define EFI_TFTP_ERROR EFIERR (23) +#define EFI_PROTOCOL_ERROR EFIERR (24) +#define EFI_INCOMPATIBLE_VERSION EFIERR (25) +#define EFI_SECURITY_VIOLATION EFIERR (26) +#define EFI_CRC_ERROR EFIERR (27) + +#define EFI_WARN_UNKNOWN_GLYPH EFIWARN (1) +#define EFI_WARN_DELETE_FAILURE EFIWARN (2) +#define EFI_WARN_WRITE_FAILURE EFIWARN (3) +#define EFI_WARN_BUFFER_TOO_SMALL EFIWARN (4) + +// +// EFI Specification Revision information +// +#define EFI_SPECIFICATION_MAJOR_REVISION 1 +#define EFI_SPECIFICATION_MINOR_REVISION 10 + +typedef struct { + EFI_UINT32 Data1; + EFI_UINT16 Data2; + EFI_UINT16 Data3; + EFI_UINT8 Data4[8]; +} EFI_GUID; + +#define APPLE_VENDOR_GUID \ + {0xAC39C713, 0x7E50, 0x423D, {0x88, 0x9D, 0x27,0x8F, 0xCC, 0x34, 0x22, 0xB6} } + +#define EFI_GLOBAL_VARIABLE_GUID \ + {0x8BE4DF61, 0x93CA, 0x11d2, {0xAA, 0x0D, 0x00, 0xE0, 0x98, 0x03, 0x2B, 0x8C} } + +typedef union { + EFI_GUID Guid; + EFI_UINT8 Raw[16]; +} EFI_GUID_UNION; + +// +// EFI Time Abstraction: +// Year: 2000 - 20XX +// Month: 1 - 12 +// Day: 1 - 31 +// Hour: 0 - 23 +// Minute: 0 - 59 +// Second: 0 - 59 +// Nanosecond: 0 - 999,999,999 +// TimeZone: -1440 to 1440 or 2047 +// +typedef struct { + EFI_UINT16 Year; + EFI_UINT8 Month; + EFI_UINT8 Day; + EFI_UINT8 Hour; + EFI_UINT8 Minute; + EFI_UINT8 Second; + EFI_UINT8 Pad1; + EFI_UINT32 Nanosecond; + EFI_INT16 TimeZone; + EFI_UINT8 Daylight; + EFI_UINT8 Pad2; +} EFI_TIME; + +// +// Bit definitions for EFI_TIME.Daylight +// +#define EFI_TIME_ADJUST_DAYLIGHT 0x01 +#define EFI_TIME_IN_DAYLIGHT 0x02 + +// +// Value definition for EFI_TIME.TimeZone +// +#define EFI_UNSPECIFIED_TIMEZONE 0x07FF + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +typedef struct { + EFI_UINT64 Signature; + EFI_UINT32 Revision; + EFI_UINT32 HeaderSize; + EFI_UINT32 CRC32; + EFI_UINT32 Reserved; +} __attribute__((aligned(8))) EFI_TABLE_HEADER; + +// +// possible caching types for the memory range +// +#define EFI_MEMORY_UC 0x0000000000000001ULL +#define EFI_MEMORY_WC 0x0000000000000002ULL +#define EFI_MEMORY_WT 0x0000000000000004ULL +#define EFI_MEMORY_WB 0x0000000000000008ULL +#define EFI_MEMORY_UCE 0x0000000000000010ULL + +// +// physical memory protection on range +// +#define EFI_MEMORY_WP 0x0000000000001000ULL +#define EFI_MEMORY_RP 0x0000000000002000ULL +#define EFI_MEMORY_XP 0x0000000000004000ULL + +// +// range requires a runtime mapping +// +#define EFI_MEMORY_RUNTIME 0x8000000000000000ULL + +typedef EFI_UINT64 EFI_PHYSICAL_ADDRESS; +typedef EFI_UINT64 EFI_VIRTUAL_ADDRESS; + +#define EFI_MEMORY_DESCRIPTOR_VERSION 1 +typedef struct { + EFI_UINT32 Type; + EFI_UINT32 Pad; + EFI_PHYSICAL_ADDRESS PhysicalStart; + EFI_VIRTUAL_ADDRESS VirtualStart; + EFI_UINT64 NumberOfPages; + EFI_UINT64 Attribute; +} __attribute__((aligned(8))) EFI_MEMORY_DESCRIPTOR; + + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_SET_VIRTUAL_ADDRESS_MAP) ( + IN EFI_UINTN MemoryMapSize, + IN EFI_UINTN DescriptorSize, + IN EFI_UINT32 DescriptorVersion, + IN EFI_MEMORY_DESCRIPTOR * VirtualMap + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_CONVERT_POINTER) ( + IN EFI_UINTN DebugDisposition, + IN OUT VOID **Address + ) __attribute__((regparm(0))); + +// +// Variable attributes +// +#define EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 +#define EFI_VARIABLE_READ_ONLY 0x00000008 + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_GET_VARIABLE) ( + IN EFI_CHAR16 * VariableName, + IN EFI_GUID * VendorGuid, + OUT EFI_UINT32 * Attributes OPTIONAL, + IN OUT EFI_UINTN * DataSize, + OUT VOID * Data + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_GET_NEXT_VARIABLE_NAME) ( + IN OUT EFI_UINTN * VariableNameSize, + IN OUT EFI_CHAR16 * VariableName, + IN OUT EFI_GUID * VendorGuid + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_SET_VARIABLE) ( + IN EFI_CHAR16 * VariableName, + IN EFI_GUID * VendorGuid, + IN EFI_UINT32 Attributes, + IN EFI_UINTN DataSize, + IN VOID * Data + ) __attribute__((regparm(0))); + +// +// EFI Time +// +typedef struct { + EFI_UINT32 Resolution; + EFI_UINT32 Accuracy; + EFI_BOOLEAN SetsToZero; +} __attribute__((aligned(4))) EFI_TIME_CAPABILITIES; + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_GET_TIME) ( + OUT EFI_TIME * Time, + OUT EFI_TIME_CAPABILITIES * Capabilities OPTIONAL + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_SET_TIME) ( + IN EFI_TIME * Time + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_GET_WAKEUP_TIME) ( + OUT EFI_BOOLEAN * Enabled, + OUT EFI_BOOLEAN * Pending, + OUT EFI_TIME * Time + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_SET_WAKEUP_TIME) ( + IN EFI_BOOLEAN Enable, + IN EFI_TIME * Time OPTIONAL + ) __attribute((regparm(0))); + +typedef enum { + EfiResetCold, + EfiResetWarm, + EfiResetShutdown, + +#ifdef TIANO_EXTENSION_FLAG + EfiResetUpdate +#endif + +} EFI_RESET_TYPE; + +typedef +EFI_RUNTIMESERVICE +VOID +(EFIAPI *EFI_RESET_SYSTEM) ( + IN EFI_RESET_TYPE ResetType, + IN EFI_STATUS ResetStatus, + IN EFI_UINTN DataSize, + IN EFI_CHAR16 * ResetData OPTIONAL + ) __attribute__((regparm(0))); + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_GET_NEXT_HIGH_MONO_COUNT) ( + OUT EFI_UINT32 * HighCount + ) __attribute__((regparm(0))); + +// +// Definition of Status Code extended data header +// +// HeaderSize The size of the architecture. This is specified to enable +// the future expansion +// +// Size The size of the data in bytes. This does not include the size +// of the header structure. +// +// Type A GUID defining the type of the data +// +// +#ifdef TIANO_EXTENSION_FLAG + +typedef +EFI_RUNTIMESERVICE +EFI_STATUS +(EFIAPI *EFI_REPORT_STATUS_CODE) ( + IN EFI_STATUS_CODE_TYPE Type, + IN EFI_STATUS_CODE_VALUE Value, + IN EFI_UINT32 Instance, + IN EFI_GUID * CallerId OPTIONAL, + IN EFI_STATUS_CODE_DATA * Data OPTIONAL + ) __attribute__((regparm(0))); + +#endif +// +// EFI Runtime Services Table +// +#define EFI_RUNTIME_SERVICES_SIGNATURE 0x56524553544e5552ULL +#define EFI_RUNTIME_SERVICES_REVISION ((EFI_SPECIFICATION_MAJOR_REVISION << 16) | (EFI_SPECIFICATION_MINOR_REVISION)) + +typedef struct { + EFI_TABLE_HEADER Hdr; + + // + // Time services + // + EFI_GET_TIME GetTime; + EFI_SET_TIME SetTime; + EFI_GET_WAKEUP_TIME GetWakeupTime; + EFI_SET_WAKEUP_TIME SetWakeupTime; + + // + // Virtual memory services + // + EFI_SET_VIRTUAL_ADDRESS_MAP SetVirtualAddressMap; + EFI_CONVERT_POINTER ConvertPointer; + + // + // Variable services + // + EFI_GET_VARIABLE GetVariable; + EFI_GET_NEXT_VARIABLE_NAME GetNextVariableName; + EFI_SET_VARIABLE SetVariable; + + // + // Misc + // + EFI_GET_NEXT_HIGH_MONO_COUNT GetNextHighMonotonicCount; + EFI_RESET_SYSTEM ResetSystem; + +#ifdef TIANO_EXTENSION_FLAG + // + // //////////////////////////////////////////////////// + // Extended EFI Services + ////////////////////////////////////////////////////// + // + EFI_REPORT_STATUS_CODE ReportStatusCode; +#endif + +} __attribute__((aligned(8))) EFI_RUNTIME_SERVICES; + +typedef struct { + EFI_TABLE_HEADER Hdr; + + // + // Time services + // + EFI_PTR64 GetTime; + EFI_PTR64 SetTime; + EFI_PTR64 GetWakeupTime; + EFI_PTR64 SetWakeupTime; + + // + // Virtual memory services + // + EFI_PTR64 SetVirtualAddressMap; + EFI_PTR64 ConvertPointer; + + // + // Variable services + // + EFI_PTR64 GetVariable; + EFI_PTR64 GetNextVariableName; + EFI_PTR64 SetVariable; + + // + // Misc + // + EFI_PTR64 GetNextHighMonotonicCount; + EFI_PTR64 ResetSystem; + +#ifdef TIANO_EXTENSION_FLAG + // + // //////////////////////////////////////////////////// + // Extended EFI Services + ////////////////////////////////////////////////////// + // + EFI_PTR64 ReportStatusCode; +#endif + +} __attribute__((aligned(8))) EFI_RUNTIME_SERVICES_64; + +// +// EFI Configuration Table +// +typedef struct { + EFI_GUID VendorGuid; + VOID *VendorTable; +} EFI_CONFIGURATION_TABLE; + +// +// EFI System Table +// +#define EFI_SYSTEM_TABLE_SIGNATURE 0x5453595320494249ULL +#define EFI_SYSTEM_TABLE_REVISION ((EFI_SPECIFICATION_MAJOR_REVISION << 16) | (EFI_SPECIFICATION_MINOR_REVISION)) +#define EFI_2_00_SYSTEM_TABLE_REVISION ((2 << 16) | 00) +#define EFI_1_02_SYSTEM_TABLE_REVISION ((1 << 16) | 02) +#define EFI_1_10_SYSTEM_TABLE_REVISION ((1 << 16) | 10) + +typedef struct EFI_SYSTEM_TABLE { + EFI_TABLE_HEADER Hdr; + + EFI_CHAR16 *FirmwareVendor; + EFI_UINT32 FirmwareRevision; + + EFI_HANDLE ConsoleInHandle; + VOID *ConIn; + + EFI_HANDLE ConsoleOutHandle; + VOID *ConOut; + + EFI_HANDLE StandardErrorHandle; + VOID *StdErr; + + EFI_RUNTIME_SERVICES *RuntimeServices; + VOID *BootServices; + + EFI_UINTN NumberOfTableEntries; + EFI_CONFIGURATION_TABLE *ConfigurationTable; + +} __attribute__((aligned(8))) EFI_SYSTEM_TABLE; + +typedef struct EFI_SYSTEM_TABLE_64 { + EFI_TABLE_HEADER Hdr; + + EFI_PTR64 FirmwareVendor; + EFI_UINT32 FirmwareRevision; + + EFI_UINT32 __pad; + + EFI_HANDLE64 ConsoleInHandle; + EFI_PTR64 ConIn; + + EFI_HANDLE64 ConsoleOutHandle; + EFI_PTR64 ConOut; + + EFI_HANDLE64 StandardErrorHandle; + EFI_PTR64 StdErr; + + EFI_PTR64 RuntimeServices; + EFI_PTR64 BootServices; + + EFI_UINT64 NumberOfTableEntries; + EFI_PTR64 ConfigurationTable; + +} __attribute__((aligned(8))) EFI_SYSTEM_TABLE_64; + +#endif /* _PEXPERT_I386_EFI_H */ diff --git a/pexpert/pexpert/ppc/protos.h b/pexpert/pexpert/ppc/protos.h index 0c7c88d17..e10a52852 100644 --- a/pexpert/pexpert/ppc/protos.h +++ b/pexpert/pexpert/ppc/protos.h @@ -143,4 +143,9 @@ extern void GratefulDebInit(void); extern void GratefulDebDisp(unsigned int coord, unsigned int data); extern void checkNMI(void); +#ifndef VM_WIMG_IO +#define VM_WIMG_IO (VM_MEM_COHERENT | \ + VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED) +#endif + #endif /* _PEXPERT_PPC_PROTOS_H_ */ diff --git a/pexpert/ppc/pe_identify_machine.c b/pexpert/ppc/pe_identify_machine.c index e0654d3c0..7b20bcdd7 100644 --- a/pexpert/ppc/pe_identify_machine.c +++ b/pexpert/ppc/pe_identify_machine.c @@ -23,6 +23,7 @@ #include #include #include +#include /* pe_identify_machine: * @@ -175,7 +176,7 @@ unsigned int PE_init_taproot(vm_offset_t *taddr) tappdata[1] = (tappdata[1] + 4095 ) & -4096; /* Make sure this is a whole page */ - *taddr = io_map_spec(tappdata[0], tappdata[1]); /* Map it in and return the address */ + *taddr = io_map_spec(tappdata[0], tappdata[1], VM_WIMG_IO); /* Map it in and return the address */ tappdata[0] = *taddr; /* Also change property */ return tappdata[1]; /* And the size */ } diff --git a/pexpert/ppc/pe_init.c b/pexpert/ppc/pe_init.c index 8c6176347..38ff042ca 100644 --- a/pexpert/ppc/pe_init.c +++ b/pexpert/ppc/pe_init.c @@ -31,6 +31,7 @@ #include #include #include +#include /* extern references */ diff --git a/pexpert/ppc/pe_kprintf.c b/pexpert/ppc/pe_kprintf.c index e8713ce46..58154df8a 100644 --- a/pexpert/ppc/pe_kprintf.c +++ b/pexpert/ppc/pe_kprintf.c @@ -31,6 +31,7 @@ #include #include #include +#include /* extern references */ extern void init_display_putc(unsigned char*, int, int); @@ -93,7 +94,7 @@ void PE_init_kprintf(boolean_t vm_initialized) if (serial_baud != -1) gPESerialBaud = serial_baud; if( (scc = PE_find_scc())) { /* See if we can find the serial port */ - scc = io_map_spec(scc, 0x1000); /* Map it in */ + scc = io_map_spec(scc, 0x1000, VM_WIMG_IO); /* Map it in */ initialize_serial((void *)scc, gPESerialBaud); /* Start up the serial driver */ PE_kputc = serial_putc;