diff -urN linux-2.6.8.1/arch/i386/Kconfig linux-2.6.8.1-rbed/arch/i386/Kconfig --- linux-2.6.8.1/arch/i386/Kconfig 2004-08-14 03:54:50.000000000 -0700 +++ linux-2.6.8.1-rbed/arch/i386/Kconfig 2005-11-07 11:12:30.000000000 -0800 @@ -539,6 +539,28 @@ depends on !SMP && X86_UP_APIC default y +config APIC_TIMER_UP + bool + depends on !SMP && X86_UP_APIC && X86_LOCAL_APIC + default y + help + This enables the apic timer patch + +config RBED + bool "RBED [Scheduler]" + depends on !SMP && !HPET_TIMER && X86_UP_APIC && APIC_TIMER_UP + default y + help + This turns on the RBED scheduler. + +config RESPONSE_TEST + bool "Responsiveness test using RTC" + help + This will allow you to use the RTC clock to make some + system responseiveness measures. You must also enable + the RTC device as well. + default y + config X86_IO_APIC bool depends on !SMP && X86_UP_IOAPIC diff -urN linux-2.6.8.1/arch/i386/kernel/apic.c linux-2.6.8.1-rbed/arch/i386/kernel/apic.c --- linux-2.6.8.1/arch/i386/kernel/apic.c 2004-08-14 03:56:24.000000000 -0700 +++ linux-2.6.8.1-rbed/arch/i386/kernel/apic.c 2005-11-07 10:13:02.000000000 -0800 @@ -12,6 +12,7 @@ * Mikael Pettersson : Power Management for UP-APIC. * Pavel Machek and * Mikael Pettersson : PM converted to driver model. + * Vincent Oberle : APIC Timer module support */ #include @@ -41,6 +42,12 @@ static void apic_pm_activate(void); +#ifdef CONFIG_RBED +void __init init_oneshot (void); +void (*apic_timer_oneshot_handler)(void) = NULL; +extern void do_oneshot_irq(void); +#endif + void __init apic_intr_init(void) { #ifdef CONFIG_SMP @@ -1077,6 +1084,23 @@ */ } +#ifdef CONFIG_APIC_TIMER_UP + +/* + * Pointer on a function to be called + * at each local APIC timer interrupt (not in SMP). + */ +void (*apic_timer_up_handler)(void); + +/* + * Sets the APIC local timer UP handler. + */ +void set_apic_timer_up_handler (void (*f)(void)) +{ + apic_timer_up_handler = f; +} +#endif /* CONFIG_APIC_TIMER_UP */ + /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by @@ -1100,13 +1124,27 @@ * because timer handling can be slow. */ ack_APIC_irq(); + + irq_enter(); +//#ifdef CONFIG_RBED +#ifdef CONFIG_APIC_TIMER_UP + if (apic_timer_up_handler != NULL) { + apic_timer_up_handler(); +/* + if (apic_timer_oneshot_handler != NULL) { + apic_timer_oneshot_handler(); +*/ + } + else + smp_local_timer_interrupt(®s); +#else /* * update_process_times() expects us to have done irq_enter(). * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - irq_enter(); smp_local_timer_interrupt(®s); +#endif irq_exit(); } @@ -1202,5 +1240,59 @@ #endif setup_boot_APIC_clock(); +#ifdef CONFIG_RBED + init_oneshot(); +#endif return 0; } + +#ifdef CONFIG_RBED + +static long us_per_tick; + +void stop_oneshot_timer(void) +{ + apic_write(APIC_TMICT, 0); +} + +void start_oneshot_timer(unsigned long us) +{ + unsigned long exp= (us * us_per_tick); + + apic_write(APIC_TMICT, exp); +} + +#ifdef CONFIG_SIMULATE_ONESHOT +void __init init_oneshot (void) +{ + return; +} +#else +void __init init_oneshot (void) +{ +/* + unsigned int lvtt_value, ver; + + ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + lvtt_value = LOCAL_TIMER_VECTOR; + + if (!APIC_INTEGRATED(ver)) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + apic_write_around(APIC_LVTT, lvtt_value); + + // Divide configuration register + apic_write(APIC_TDCR, APIC_TDR_DIV_1); + + us_per_tick= calibration_result /(1000000/HZ); + + stop_oneshot_timer(); + + apic_timer_oneshot_handler = do_oneshot_irq; +*/ + +#ifdef CONFIG_APIC_TIMER_UP + setup_uka_apic_timer(); +#endif +} +#endif //END OF SIMULATE_ONESHOT +#endif diff -urN linux-2.6.8.1/arch/i386/kernel/irq.c linux-2.6.8.1-rbed/arch/i386/kernel/irq.c --- linux-2.6.8.1/arch/i386/kernel/irq.c 2004-08-14 03:54:48.000000000 -0700 +++ linux-2.6.8.1-rbed/arch/i386/kernel/irq.c 2005-11-07 10:13:02.000000000 -0800 @@ -45,6 +45,12 @@ #include #include +#ifdef CONFIG_RESPONSE_TEST +#include "asm/mc146818rtc.h" +unsigned long long rtc_irq_time=0; +unsigned int rtc_irq_time_set=0; +#endif + /* * Linux has a controller-independent x86 interrupt architecture. * every controller has a 'controller-template', that is used @@ -408,6 +414,7 @@ spin_unlock_irqrestore(&desc->lock, flags); } + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -560,6 +567,13 @@ desc->status &= ~IRQ_INPROGRESS; out: +#ifdef CONFIG_RESPONSE_TEST + if(irq==RTC_IRQ && rtc_irq_time_set==0) { + rtc_irq_time = sched_clock(); + rtc_irq_time_set=1; + } +#endif + /* * The ->end() handler has to deal with interrupts which got * disabled while the handler was running. diff -urN linux-2.6.8.1/arch/i386/kernel/Makefile linux-2.6.8.1-rbed/arch/i386/kernel/Makefile --- linux-2.6.8.1/arch/i386/kernel/Makefile 2004-08-14 03:54:51.000000000 -0700 +++ linux-2.6.8.1-rbed/arch/i386/kernel/Makefile 2005-11-07 10:13:02.000000000 -0800 @@ -21,7 +21,8 @@ obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +#obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o uka_apic_timer.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o diff -urN linux-2.6.8.1/arch/i386/kernel/uka_apic_timer.c linux-2.6.8.1-rbed/arch/i386/kernel/uka_apic_timer.c --- linux-2.6.8.1/arch/i386/kernel/uka_apic_timer.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/arch/i386/kernel/uka_apic_timer.c 2005-11-07 10:13:02.000000000 -0800 @@ -0,0 +1,937 @@ +/* -*- linux-c -*- + * uka_apic_timer.c + * + * Module providing precise timers using the Local APIC timer. + * + * Copyright (C) 2000 Vincent Oberle (vincent@oberle.com) + * Institute of Telematics, University of Karlsruhe, Germany. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory + * of this archive for more details. + */ + +/* + * IMPORTANT + * --------- + * + * The kernel must be patched, see patch directory + * Have a look in the config_apic_timer.h file. + * The kernel version is indicated in the KERNEL_VERSION file. + * + * + * Race conditions / disabling interrupts + * -------------------------------------- + * + * We disable interrupts whenever the timer list is being modified. + * That is stronger that the precedent implementation, which only + * protected the timer list modifications from being interrupted + * by an APIC timer interrupt. + * Basically the goal is to make sure that the add_apic_timer(), + * mod_apic_timer() and del_apic_timer() are mutually exclusive. + * + * The timer functions themselves still run with interrupts enabled. + * + * Note that disabling interrupts can really impact system performance + * and a better solution may have to be found. + * + * One possible race condition that can occur is (thanks to Ryan Barnett + * for pointing it out): + * "Suppose you have two threads of execution, one done from a user + * system call, and another done from an interrupt handler. They both want to + * do "add_apic_timer()". The user system call first starts... It calls + * APIC_TIMER_LOCK then it does insert_apic_timer(timer). Then it gets to + * the middle of insert_apic_timer() where it is setting the previous timer + * to NULL and messing with the timer list. At exactly that moment, an + * interrupt occurs and the user code is halted. Then the new thread of + * execution starts and does add_apic_timer(), it then gets messed up by the + * half-modified timer list. Eventually the old thread will begin again and + * now it will get really screwed up, possibly dereferencing an invalid + * pointer crashing the kernel. + * The same race condition could be applied if you have two user-threads that + * are making system calls that both want to add_del_timer()." + * + * + * Cache misses influence? + * ----------------------- + * + * What is the influence of cache miss? + * If we consider that a cache miss that forces a main RAM read + * needs a lot of cycles (-> 80), they can have a lot of influence + * if we try to have a "cycle" precision. + * But this is quite difficult to solve (maybe with some intelligent + * "dummy" reads...) + */ + + +#include +#include +#include +#include +#include +/* Access to the RTC (outb_p, inb_p), for autocalibrating */ +#include +/* Warning less compiling */ +#include + +#include + +/* Access to machine-specific registers (rdmsr...) */ +#include +/* All APIC stuff */ +#include +/* For LOCAL_TIMER_VECTOR */ +#include +/* 64-bit division. */ +#include + +/* kmalloc, kfree */ +//#include +#include + +#include + +/*flag to notify the kernel the apic timer is ready to use*/ +int apic_flag = 0; + +void print_apic_timer (struct apic_timer_list *); + +/* File with defines for customizing of the module */ +//#include "config_apic_timer.h" + + +//MODULE_AUTHOR("Vincent Oberle"); +//MODULE_DESCRIPTION("Precise timers module using the local APIC timer"); + + + +/*** init code ***/ + +/* + * Adds the supplementary timer interrupt handler and + * unmasks the timer interrupt. + * Returns 0 if ok. + * Not to be used outside the module. See add_apic_timer instead. + */ +int request_apic_timer_irq (void (*supp_handler)(void)) +{ + printk(KERN_INFO "Add the supp handler to the APIC timer handler\n"); + + /* Here, we enable (unmask) the local APIC timer interrupt */ + apic_write(APIC_LVTT, LOCAL_TIMER_VECTOR); + + /* Divide configuration register */ + apic_write(APIC_TDCR, APIC_TDR_DIV_1); + + set_apic_timer_up_handler(supp_handler); + + return 0; +} + + +/* + * Processor and bus frequencies. + * Needed because the APIC timer is based on the bus freq + * and we want to set it with a TSC value which is based on + * the processor freq. + */ +unsigned long proc_freq; +unsigned long bus_freq; + +/* + * Following functions are copied from apic.c + * Some cannot be exported since they're __init + */ +static unsigned int get_8254_timer_count_uka(void) +{ + extern spinlock_t i8253_lock; + unsigned long flags; + + unsigned int count; + + spin_lock_irqsave(&i8253_lock, flags); + + outb_p(0x00, 0x43); + count = inb_p(0x40); + count |= inb_p(0x40) << 8; + + spin_unlock_irqrestore(&i8253_lock, flags); + + return count; +} + +void wait_8254_wraparound_uka(void) +{ + unsigned int curr_count, prev_count=~0; + int delta; + + curr_count = get_8254_timer_count_uka(); + + do { + prev_count = curr_count; + curr_count = get_8254_timer_count_uka(); + delta = curr_count-prev_count; + } while (delta < 300); +} + +#define APIC_DIVISOR 16 + +void setup_APIC_LVTT_uka(unsigned int clocks) +{ + unsigned int lvtt1_value, tmp_value; + + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | + APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + apic_write_around(APIC_LVTT, lvtt1_value); + + /* Divide PICLK by 16 */ + tmp_value = apic_read(APIC_TDCR); + apic_write_around(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); + + apic_write_around(APIC_TMICT, clocks / APIC_DIVISOR); +} + +/* + * Calibration code. + * This function initializes proc_freq and bus_freq. + * + * Similar to the code in arch/i386/kernel/apic.c + * int __init calibrate_APIC_clock(void) + */ +void calibrate_apic_timer (void) +{ + /* For the processor clock */ + unsigned long long t1 = 0, t2 = 0; + /* For the bus clock */ + unsigned long tt1, tt2; + + int i; + const int LOOPS = HZ/10; + + printk(KERN_INFO "Calibrating APIC timer\n"); + + if (!cpu_has_tsc) { + printk(KERN_WARNING "CPU has no TSC, " + "cannot calibrate the APIC timer\n"); + return; + } + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + */ + setup_APIC_LVTT_uka(1000000000); + + /* Wraparound to start exact measurement */ + wait_8254_wraparound_uka(); + + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); + + /* + * Let's wait LOOPS wraprounds: + */ + for (i = 0; i < LOOPS; i++) + wait_8254_wraparound_uka(); + + tt2 = apic_read(APIC_TMCCT); + rdtscll(t2); + + proc_freq = (long)(t2 - t1); + /* bus_freq = tt1 - tt2; */ + bus_freq = (tt1 - tt2) * APIC_DIVISOR; + + printk(KERN_INFO "Proc freq %ld\n", proc_freq / LOOPS); + printk(KERN_INFO "Bus freq %ld\n", bus_freq / LOOPS); +} + +/*** end of init code ***/ + + + +/* + * Error adjustment: + * tsc_error saves the value of the error for the precedent timer + * issued. This value is used to correct the programing for the next timer. + */ +unsigned long tsc_error; + + + +#ifdef APIC_TIMER_TRACE_EXE + +/* + * Array to store the function called. + * If they are more trace than the size of the array, they are + * NOT recorded. + */ +#define APIC_TIMER_TRACE_ARRAY_SIZE 1000 +static char *trace_array[APIC_TIMER_TRACE_ARRAY_SIZE]; +static int trace_index; + +#define BUILD_TRACE_NAME(n) static char *trace_name_##n = #n; +BUILD_TRACE_NAME(start) BUILD_TRACE_NAME(detach) +BUILD_TRACE_NAME(detach_first) BUILD_TRACE_NAME(insert) +BUILD_TRACE_NAME(run) BUILD_TRACE_NAME(add) +BUILD_TRACE_NAME(del) BUILD_TRACE_NAME(mod) +BUILD_TRACE_NAME(exe) BUILD_TRACE_NAME(do_irq) +#undef BUILD_TRACE_NAME +#define APIC_TIMER_TRACE(n) if (trace_index < APIC_TIMER_TRACE_ARRAY_SIZE) \ + trace_array[trace_index++] = trace_name_##n; + +#else +#define APIC_TIMER_TRACE(n) +#endif /* APIC_TIMER_TRACE_EXE */ + + + +#ifdef APIC_TIMER_ERROR_STAT + +/* + * Error statistics. + */ +unsigned long min_error, max_error; +unsigned long long total_error; +unsigned long nb_error_measure; + +#endif /* APIC_TIMER_ERROR_STAT */ + + + +/* + * Starts the APIC timer. + * The value in parameter is in "processor clocks" units. + * It is the value of the TSC when we want to execute the timer function. + * + * The function returns an integer: + * "0" means the timer should be issued immediately because it has expired, + * "1" means the APIC timer was set. + */ +inline int start_apic_timer (unsigned long long value) +{ + unsigned long eax, edx; + long long exp; /* Signed! */ + unsigned long long apic_tmict; /* For the 64-bits division */ + + APIC_TIMER_TRACE(start); + + rdtsc(eax, edx); + exp = value - TO_ULL(edx, eax); + /* printk("Val to prog in APIC before correction \t%d (TSC)\n", exp); */ + + /* Error adjustment */ + exp -= tsc_error; + + /* + * It is possible to try to program a timer in the past (exp < 0) + * if the timer expired since we called start_apic_timer. + */ + if (exp <= 0) + return 0; /* Timer expired */ + + /* + * If the timer is too far in the future, ie the value cannot + * be programmed in the 32-bit APIC timer, we programm it before, + * knowing the timer will be automatically reprogrammed than. + */ + if (exp > 0xFFFFFFFF) { + /* printk(KERN_INFO "apic: too large exp value, %ld%ld\n", + ULL_HIGH(exp), ULL_LOW(exp)); */ + exp = 0xFFFFFFFF; + } + + /* + * Conversion in "bus units" for the APIC timer. + * + * Accurate 64-bit division. Thanks to Ryan Barnett. + * do_div put the result of the division in the first param and + * returns the remainder + */ + apic_tmict = (unsigned long long) exp * (unsigned long long)bus_freq; + if (do_div(apic_tmict, proc_freq)) + apic_tmict++; + + /* + * Starts the APIC timer. + */ + apic_write(APIC_TMICT, (unsigned long)apic_tmict); + + return 1; /* APIC Timer programmed */ +} + + + +/*** timer management functions ***/ + +/* + * The APIC timer list. + * We keep the list in order (ie with the "expires" field) + * to have the fastest execution possible (ie we only have + * to look at the beginning of the list to know which timers + * to execute). + * For the store, we look first at the end, because timers + * are more likely to be added in order (ie at the end of the list). + */ +struct apic_timer_list *apic_timer_start; +struct apic_timer_list *apic_timer_end; + + +/* + * Removes the timer from the list. + * + * Only list management, does nothing with time or APIC stuff. + * Does not modify the timer pointed by "timer" + * (no timer->next = timer->prev = NULL;) + * This function should be protected by a lock. + */ +int detach_apic_timer (struct apic_timer_list *timer) +{ + APIC_TIMER_TRACE(detach); + + if (!timer) { + printk(KERN_WARNING "timer null in detach_apic_timer\n"); + return 0; + } + + if (timer->prev) { + timer->prev->next = timer->next; + } else { + /* To check if the timer is really in the list */ + if (apic_timer_start == timer) + apic_timer_start = timer->next; + } + + if (timer->next) { + timer->next->prev = timer->prev; + } else { + if (apic_timer_end == timer) + apic_timer_end = timer->prev; + } + + return 1; +} + +/* + * Removes the first timer of the list. + * Faster than detach_apic_timer (even inlined). + * + * Only list management, does nothing with time or APIC stuff. + * Does not modify the timer pointed by "apic_timer_start" + * This function should be protected by a lock. + */ +inline int detach_first_apic_timer (void) +{ + APIC_TIMER_TRACE(detach_first); + + if (!apic_timer_start) { + printk(KERN_WARNING "apic_timer_start null in detach_first_apic_timer\n"); + return 0; + } + + apic_timer_start = apic_timer_start->next; + if (apic_timer_start) + apic_timer_start->prev = NULL; + else + apic_timer_end = NULL; + + return 1; +} + +/* + * Adds a timer to the list, starting from the end + * and ordering it with the expires field. + * + * Does not access the APIC. + * This function should be protected by a lock. + */ +void insert_apic_timer (struct apic_timer_list *timer) +{ + struct apic_timer_list *t_current = apic_timer_end; + unsigned long long t_expires = timer->expires; + + APIC_TIMER_TRACE(insert); + + if (!timer) { + printk(KERN_WARNING "insert_apic_timer timer null\n"); + return; + } + + /* + * Loop until we find the timer where the expires is lower + * or equal than the expires to search. + */ + while ((t_current) && (t_current->expires > t_expires)) + t_current = t_current->prev; + + /* Add the timer */ + if (t_current) { + /* The timer is added after t_current */ + timer->next = t_current->next; + timer->prev = t_current; + t_current->next = timer; + if (timer->next) /* ie old t_current->next */ + timer->next->prev = timer; + else + apic_timer_end = timer; + } else { + /* The timer must be added at the beginning */ + timer->next = apic_timer_start; + timer->prev = NULL; + if (apic_timer_start) /* List not empty */ + apic_timer_start->prev = timer; + else /* List empty */ + apic_timer_end = timer; + apic_timer_start = timer; + } +} + + +#define APIC_TIMER_ERR_ADJ_NO_UPDATE 0 +#define APIC_TIMER_ERR_ADJ_UPDATE 1 + +/* + * Indicates if we are in the run_apic_timer function. + * It is used in order not to reentrant in it again by calling + * it other times. + */ +static int in_run_apic_timer; + +/* + * Check if there are some timers to be issued. + * + * The flag parameter indicates if the error adjustment + * should be updated (APIC_TIMER_ERR_ADJ_UPDATE) + * or no (APIC_TIMER_ERR_ADJ_NO_UPDTATE) + * + * The locking is managed by the function. + * + * For each timer, detachs it and calls the timer function. + */ +void run_apic_timer (int flag) +{ + unsigned long eax, edx; + unsigned long long current_tsc; + struct apic_timer_list *t_current; + unsigned long tmp_tsc_error; + unsigned long flags; + + in_run_apic_timer = 1; + + restart_run_apic_timer: + + APIC_TIMER_TRACE(run); + + /*This statement added by caixue lin on 02/10/04*/ + //flag = APIC_TIMER_ERR_ADJ_NO_UPDATE; + + if (flag == APIC_TIMER_ERR_ADJ_UPDATE) + tmp_tsc_error = tsc_error; + else + tmp_tsc_error = 0; + + rdtsc(eax, edx); + current_tsc = TO_ULL(edx, eax); + + /* + * Detach the timer and call the timer functions + * of the timers to be issued. + */ + + while ((apic_timer_start) && ((apic_timer_start->expires - tmp_tsc_error) < current_tsc)) { + if (flag == APIC_TIMER_ERR_ADJ_UPDATE) { + rdtsc(eax, edx); + current_tsc = TO_ULL(edx, eax); + + tmp_tsc_error = DIFF_ABS(current_tsc, apic_timer_start->expires); + tsc_error = DIFF_ABS(tsc_error, tmp_tsc_error); + tmp_tsc_error = tsc_error; +#ifdef APIC_TIMER_ERROR_STAT + if (tsc_error < min_error) min_error = tsc_error; + if (max_error < tsc_error) max_error = tsc_error; + total_error += tsc_error; + nb_error_measure++; +#endif /* APIC_TIMER_ERROR_STAT */ + } + + t_current = apic_timer_start; /* ptr on the timer we're working on */ + + /* Detach timer - Protected by disabling interrupts. */ + local_irq_save(flags); + detach_first_apic_timer(); + /* here the list is modified, apic_timer_start points on the next timer */ + local_irq_restore(flags); + + t_current->next = t_current->prev = NULL; + + APIC_TIMER_TRACE(exe); + + /* + * Call the timer function. + * It is allowed to modify the timer list here, + * so we first enable interrupts. + */ + if (t_current->function != NULL) { + /* local_irq_enable(); */ + //printk(KERN_DEBUG "***before***Process: %d with timer: %llu\n, now: %llu\n",((struct task_struct *)(t_current->data))->pid, t_current->expires, current_tsc); + t_current->function(t_current->expires, t_current->data); + //printk(KERN_DEBUG "***after***Process: %d with timer: %llu\n, now: %llu\n",((struct task_struct *)(t_current->data))->pid, t_current->expires, current_tsc); + /* local_irq_disable(); */ + } + +#ifdef APIC_TIMER_FREE_ISSUED + printk(KERN_DEBUG "free timer????\n"); + kfree(t_current); +#endif + + rdtsc(eax, edx); + current_tsc = TO_ULL(edx, eax); + + /* apic_timer_start already points on the next timer */ + } + + /* Reprogram the APIC timer. */ + if ( (apic_timer_start) && + (!(start_apic_timer(apic_timer_start->expires))) ) { + flag = APIC_TIMER_ERR_ADJ_UPDATE; + goto restart_run_apic_timer; + } + + in_run_apic_timer = 0; +} + + +/* + * The function that is called by the APIC timer + * (see request_apic_timer_irq) + */ +void do_apic_irq (void) +{ + /* local_irq_disable(); */ + + APIC_TIMER_TRACE(do_irq); + + /* + * If the timer list is empty, do not do anything. + */ + if (!apic_timer_start) { + local_irq_enable(); + return; + } + + if (!in_run_apic_timer) { + run_apic_timer(APIC_TIMER_ERR_ADJ_UPDATE); + } + + /* local_irq_enable(); */ +} + + + +/* + * Adds an APIC timer + */ +int add_apic_timer (struct apic_timer_list *timer) +{ + unsigned long flags; + + //printk("Adding apic timer\n"); + APIC_TIMER_TRACE(add); + + if (!timer) { + return 0; /* A good usage of the module shouldn't need that... */ + } + + /* Supposes a clean management of timers... */ + if (timer->prev) { + //printk(KERN_WARNING "bug: kernel timer added twice at %p.\n", + printk("bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); + return 0; + } + + /* + * Insert the timer, protected by disabling interrupts. + */ + local_irq_save(flags); + insert_apic_timer(timer); + local_irq_restore(flags); + + /* + * Check if there are some timers to execute in the case were not + * already called from run_apic_timer (avoiding recursion). + */ + if (!in_run_apic_timer) { + run_apic_timer(APIC_TIMER_ERR_ADJ_NO_UPDATE); + } + + return 1; +} + + +/* + * Removes an APIC timer. + * If the next and prev pointers of the timer are not NULL, + * the timer is considered to be in the list (if it isn't, the + * list can be corrupted). + */ +int del_apic_timer (struct apic_timer_list *timer) +{ + int ret; + unsigned long flags; + + APIC_TIMER_TRACE(del); + + /* + * We do not check if we delete the first timer or not: + * if it is the first one, an interrupt will be generated, + * the list will be checked and probably no timer issued. + * The APIC timer will be reprogrammed to the first next timer. + */ + + if (!timer) { + return 0; + } + + /* Detach the timer. */ + local_irq_save(flags); + ret = detach_apic_timer(timer); + + /* + * It is a good idea (TM) to try to have next = prev = NULL + * for a timer that is not in the list. + */ + timer->next = timer->prev = NULL; + + local_irq_restore(flags); + + if (!in_run_apic_timer) { + run_apic_timer(APIC_TIMER_ERR_ADJ_NO_UPDATE); + } + + return ret; +} + + +/* + * More efficient way to update the expire field of an active timer + * (if the timer is inactive it will be activated). + * mod_apic_timer(a,b) is equivalent to: + * del_apic_timer(a); a->expires = b; add_apic_timer(a) + */ +int mod_apic_timer (struct apic_timer_list *timer, unsigned long long expires) +{ + int ret; + unsigned long flags; + + APIC_TIMER_TRACE(mod); + + if (!timer) { + return 0; + } + + /* + if (timer->prev) { + printk(KERN_WARNING "bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); + return -1; + } + */ + + /* Modify the timer. */ + local_irq_save(flags); + + timer->expires = expires; + ret = detach_apic_timer(timer); + insert_apic_timer(timer); + + local_irq_restore(flags); + + if (!in_run_apic_timer) { + run_apic_timer(APIC_TIMER_ERR_ADJ_NO_UPDATE); + } + + return ret; +} + + +/* + * Export the timer list management functions. + */ + +/* +EXPORT_SYMBOL(add_apic_timer); +EXPORT_SYMBOL(del_apic_timer); +EXPORT_SYMBOL(mod_apic_timer); + +//EXPORT_SYMBOL(apic_timer_start); // Useful for debugging +*/ + +/*** end of timer management functions ***/ + + +/*** procfs code ***/ + +#ifdef CONFIG_PROC_FS + +/* + * Note that calling this function while timers have to be + * issued may impact on performance and precision of the timers. + */ +static int apic_timer_proc_get_info (char *buffer, + char **start, + off_t offset, + int length) +{ + int len; + off_t pos = 0; + off_t begin = 0; + struct apic_timer_list *t_current = apic_timer_start; + unsigned long flags; +#ifdef APIC_TIMER_TRACE_EXE + int i = 0; +#endif +#ifdef APIC_TIMER_ERROR_STAT + unsigned long long av_error; +#endif + + len = sprintf(buffer, "APIC timer module for " UTS_RELEASE "\n"); + + local_irq_save(flags); + + if (!apic_timer_start) { + len += sprintf(buffer + len, "No timer left to be issued\n"); + goto lab_proc_get_info; + } + + len += sprintf(buffer + len, "Timer to be issued\n"); + while ((t_current) && (pos <= offset + length)) { + len += sprintf(buffer + len, "Timer with %lu %lu\n", + ULL_HIGH(t_current->expires), + ULL_LOW(t_current->expires)); + pos = begin + len; + if (pos < offset) { + len = 0; + begin = pos; + } + t_current = t_current->next; + } + + lab_proc_get_info: + +#ifdef APIC_TIMER_TRACE_EXE + len += sprintf(buffer + len, "Trace information\n"); + while ((i < trace_index) && (pos <= offset + length)) { + len += sprintf(buffer + len, "%s\n", trace_array[i]); + pos = begin + len; + if (pos < offset) { + len = 0; + begin = pos; + } + i++; + } +#endif + + local_irq_restore(flags); + +#ifdef APIC_TIMER_ERROR_STAT + if (nb_error_measure > 0) { + av_error = total_error; + do_div(av_error, nb_error_measure); + len += sprintf(buffer + len, "Min error: %lu\n", min_error); + len += sprintf(buffer + len, "Max error: %lu\n", max_error); + len += sprintf(buffer + len, "Average error: %lu\n", av_error); + } +#endif + + *start = buffer + (offset - begin); + len -= (offset - begin); + if (len > length) + len = length; + return len; +} + +#endif + +/*** end of procfs code ***/ + +void print_apic_timer (struct apic_timer_list *t_current) +{ + if (t_current) { + printk(KERN_DEBUG "Process: %d with timer: %llu\n",((struct task_struct *)(t_current->data))->pid, t_current->expires); + } +} + +void print_apic_timer_list (void) +{ + struct apic_timer_list *t_current = apic_timer_start; + printk("APIC timer list\n"); + while (t_current) { + print_apic_timer(t_current); + t_current = t_current->next; + } +} + +/*added by caixue lin on 04/14/03*/ +int setup_uka_apic_timer() +{ + printk("\n"); + printk("Init the UKA APIC timer \n"); + printk(KERN_INFO "Init the UKA APIC timer \n"); + + /* + * Checks if there is a good APIC. + * If there is no local APIC found, the processor might not + * be a P6, or the option wasn't correctly activated in the kernel. + * This checking shoud be enough. + */ + if (!cpu_has_apic) + return -ENODEV; + + set_apic_timer_up_handler(NULL); + +#ifdef APIC_TIMER_TRACE_EXE + printk(KERN_INFO "APIC tracing activated\n"); +#endif + + /* + * We do the calibration two times since sometimes the first one gets + * a wrong bus clock value. + */ + calibrate_apic_timer(); + calibrate_apic_timer(); + + if (request_apic_timer_irq((void*)do_apic_irq) < 0) + return -ENODEV; + + tsc_error = 0; + +#ifdef APIC_TIMER_TRACE_EXE + trace_index = 0; +#endif +#ifdef APIC_TIMER_ERROR_STAT + min_error = 0xFFFFFFFF; + max_error = 0; + total_error = 0; + nb_error_measure = 0; +#endif + + apic_timer_start = apic_timer_end = NULL; + +#ifdef CONFIG_PROC_FS + if (!create_proc_info_entry("apic_timer", 0, NULL, apic_timer_proc_get_info)) + printk(KERN_ERR "uka_apic_timer: registering /proc/apic_timer failed\n"); +#endif + +#ifdef CONFIG_RBED + /* This flag is to notify the kernel whether the apic timer is + * ready to use of not. It can be set by a system-call. + */ + apic_flag = 1; +#endif + + return 0; + +} diff -urN linux-2.6.8.1/COPYING.rbed linux-2.6.8.1-rbed/COPYING.rbed --- linux-2.6.8.1/COPYING.rbed 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/COPYING.rbed 2005-11-15 13:40:30.000000000 -0800 @@ -0,0 +1,37 @@ +License +------- + +Copyright (C) 2003-2005 Caixue Lin (lcx@cse.ucsc.edu) +Computer Science Department, University of California, Santa Cruz. + + +This file is part of the RBED scheduler. + +RBED is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +RBED is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Foobar; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Documentation +------------- + +Please read Documentation/rbed.txt for more information. + + +Support +------- + +If you need support for this, please contact me at the following email: + +Caixue (lcx@cse.ucsc.edu) + diff -urN linux-2.6.8.1/Documentation/rbed.txt linux-2.6.8.1-rbed/Documentation/rbed.txt --- linux-2.6.8.1/Documentation/rbed.txt 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/Documentation/rbed.txt 2005-11-07 12:37:02.000000000 -0800 @@ -0,0 +1,14 @@ +1) You should use the linux-2.6.8.1 (download from kernel.org) + +2) To activate the rbed scheduler do following when making your +configuration: make config (assuming your architecture is a +modern Pentium): + +In "Processor type and features" + +say N to "HPET Timer Support" (for now) +say N to "Symmetric multi-processing support" (for now) +say Y to "Local APIC support on uniprocessors" +say Y to "RBED" + +3) Then build (make bzImage) and install the kernel in the normal way. diff -urN linux-2.6.8.1/drivers/char/rtc.c linux-2.6.8.1-rbed/drivers/char/rtc.c --- linux-2.6.8.1/drivers/char/rtc.c 2004-08-14 03:56:26.000000000 -0700 +++ linux-2.6.8.1-rbed/drivers/char/rtc.c 2005-11-07 10:13:14.000000000 -0800 @@ -306,6 +306,10 @@ unregister_sysctl_table(sysctl_header); } +#ifdef CONFIG_RESPONSE_TEST +extern unsigned long long rtc_irq_time; +extern unsigned int rtc_irq_time_set; +#endif /* * Now all the various file operations that we export. */ @@ -354,10 +358,22 @@ schedule(); } while (1); +#ifdef CONFIG_RESPONSE_TEST + if(rtc_irq_time_set) { + + data= (long) (sched_clock() - rtc_irq_time); + + //printk("time is %llu\n", rtc_irq_time); + + rtc_irq_time_set =0; + } +#endif + if (count < sizeof(unsigned long)) retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); else retval = put_user(data, (unsigned long __user *)buf) ?: sizeof(long); + out: current->state = TASK_RUNNING; remove_wait_queue(&rtc_wait, &wait); diff -urN linux-2.6.8.1/include/asm-i386/apic.h linux-2.6.8.1-rbed/include/asm-i386/apic.h --- linux-2.6.8.1/include/asm-i386/apic.h 2004-08-14 03:56:23.000000000 -0700 +++ linux-2.6.8.1-rbed/include/asm-i386/apic.h 2005-11-07 10:13:58.000000000 -0800 @@ -101,6 +101,10 @@ #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +#ifdef CONFIG_APIC_TIMER_UP +extern void set_apic_timer_up_handler (void (*f)(void)); +#endif + #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* __ASM_APIC_H */ diff -urN linux-2.6.8.1/include/asm-i386/param.h linux-2.6.8.1-rbed/include/asm-i386/param.h --- linux-2.6.8.1/include/asm-i386/param.h 2004-08-14 03:54:51.000000000 -0700 +++ linux-2.6.8.1-rbed/include/asm-i386/param.h 2005-11-07 10:13:58.000000000 -0800 @@ -2,7 +2,7 @@ #define _ASMi386_PARAM_H #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ +# define HZ 1000 /* Internal kernel timer frequency */ # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif diff -urN linux-2.6.8.1/include/asm-um/param.h linux-2.6.8.1-rbed/include/asm-um/param.h --- linux-2.6.8.1/include/asm-um/param.h 2004-08-14 03:54:50.000000000 -0700 +++ linux-2.6.8.1-rbed/include/asm-um/param.h 2005-11-07 10:13:49.000000000 -0800 @@ -10,7 +10,7 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ #ifdef __KERNEL__ -#define HZ 100 +#define HZ 1000 #define USER_HZ 100 /* .. some user interfaces are in "ticks" */ #define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */ #endif diff -urN linux-2.6.8.1/include/linux/init_task.h linux-2.6.8.1-rbed/include/linux/init_task.h --- linux-2.6.8.1/include/linux/init_task.h 2004-08-14 03:54:49.000000000 -0700 +++ linux-2.6.8.1-rbed/include/linux/init_task.h 2005-11-07 10:13:52.000000000 -0800 @@ -112,6 +112,24 @@ .proc_lock = SPIN_LOCK_UNLOCKED, \ .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ + .srt_type = 0, \ + .process_type = 0, \ + .period = 0, \ + .edf_deadline = -1, \ + .actual_period = 0, \ + .previous_period = 0, \ + .exe_time = 0, \ + .release_time = 0, \ + .completion_time = 0, \ + .initial_release_time = 0, \ + .rbed_state = 0, \ + .weight = 0, \ + .apic_start_tsc = 0, \ + .apic_left_tsc = 0, \ + .apic_timer = NULL, \ + .srt_spec = NULL, \ + .server = NULL, \ + .current_server = NULL, \ } diff -urN linux-2.6.8.1/include/linux/rbed.h linux-2.6.8.1-rbed/include/linux/rbed.h --- linux-2.6.8.1/include/linux/rbed.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/include/linux/rbed.h 2005-11-07 10:13:53.000000000 -0800 @@ -0,0 +1,39 @@ +#ifndef _LINUX_RBED_H +#define _LINUX_RBED_H + +#include + + +struct server_struct { + int sid; //server id = pid + int policy; //server policy = p->policy + struct task_struct *p; //point to the served task + int process_type; + int srt_type; + unsigned long long timestamp; //timestamp, may be reset to now + unsigned long long rtimestamp;//real timestamp + unsigned long period; + unsigned long long deadline; + unsigned long budget; + unsigned long weight; + long left_time; + long one_shot_time; //For SLAD only + int laxity_flag; //For SLAD only + int state; //ready,slack,expired or stopped (exit) + int incashqueue; //For CASH only, 0: not in cash queue; 1: in + int inqueue; + struct list_head server_list;//point to next server + int inexpiredqueue; + struct list_head expired_server_list;//point to next expired server +}; + +typedef struct server_struct server_t; + +struct slacks_struct { + unsigned long long deadline; + long left_time; + struct list_head slack_server_list;//point to next server +}; + +typedef struct slacks_struct slacks_t; +#endif diff -urN linux-2.6.8.1/include/linux/rbed_var.h linux-2.6.8.1-rbed/include/linux/rbed_var.h --- linux-2.6.8.1/include/linux/rbed_var.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/include/linux/rbed_var.h 2005-11-07 10:13:53.000000000 -0800 @@ -0,0 +1,114 @@ +#ifndef _LINUX_RBED_VAR_H +#define _LINUX_RBED_VAR_H + +#include + +#define SCHED_RAND 3 +#define SCHED_SLASH 4 +#define SCHED_CBS 5 +#define SCHED_CASH 6 +#define SCHED_SLAD 7 +#define SCHED_BACKSLASH 8 +#define SCHED_EDF 9 +#define SCHED_RBED 10 + +#define BE 0 +#define SRT 1 +#define HRT 2 +#define MDSRT 0 +#define RASRT 1 +#define RBSRT 2 +#define FRT 4 + +//#define U_BETA ((unsigned long)(50000))//minimum reserved usage for BEs +//#define U_BETA ((unsigned long)(5000))//minimum reserved usage for BEs +//#define U_BETA ((unsigned long)(500))//minimum reserved usage for BEs +#define U_BETA ((unsigned long)(1000))//minimum reserved usage for BEs: 0.1% +//#define BE_DEFAULT_PERIOD ((unsigned long)(1000000)) //in ns, 1ms +//#define BE_DEFAULT_PERIOD ((unsigned long)(3000000)) //in ns, 3ms +//#define BE_DEFAULT_PERIOD ((unsigned long)(5000000)) //in ns, 5ms +#define BE_DEFAULT_PERIOD ((unsigned long)(50000000)) //in ns, 50ms + +//following are server states +#define S_RUNNING (0) //server is ready for normal schedule +#define S_SLACK (1) //server is used for slack schedule +#define S_EXPIRED (2) //server is expired (sleeping), and charged for next period +#define S_STOPPED (3) //server is stopped, meaning task exits already. + +struct qos_struct { + long benefit; //qos benefit, <=1000(normalized) + long exe_time; //worst case execution time, + unsigned long period; //period, <=5s right now, otherwise overflow. +}; + +union union_srt_spec { + struct { + int mk_m; + int mk_k; + int state; + } frt; + struct { + unsigned long buffersize; + unsigned long currentlevel; + } rbsrt; + struct { + int min_dmr; + } mdsrt; + struct { + int nr_qos_level; //the number of qos levels + int qos_scaler; //the number of qos levels + struct qos_struct qos[3]; //qos specifications + } rasrt; +}; + +typedef union union_srt_spec u_srt_spec; + +struct rbed_sched_param { + int sched_priority; + int srt_type; //srt process type: MDSRT,RASRT,RBSRT + int process_type; //process type: HRT,SRT,BE + long exe_time; //worst case for HRT, and ave case for SRT + unsigned long period; //period, <=5s right now, otherwise overflow. + u_srt_spec srt_spec; +}; + + +//added in sched.c +extern void move_to_tail(struct task_struct *, server_t *); + +//added in rbed.c +extern void rbed_init(struct task_struct *); +extern void init_be_task(struct task_struct *, int); +extern void resort_server(server_t *); +extern void expire_server(server_t *); +extern void reset_server(server_t *); +extern void wake_up_server(server_t *); +extern void enqueue_expired_server(server_t *s); +extern void dequeue_expired_server(server_t *s); + +extern void reset_process(struct task_struct *); + +extern int apic_sleep(struct task_struct *, unsigned long long); +extern int apic_start(server_t *, unsigned long long); +extern int apic_over(server_t *); +extern void deactivate_task_be(struct task_struct *); +extern void activate_task_be(struct task_struct *); + +/* Dequeue from slack server queue */ +void dequeue_slacks(server_t *s); +/* Enqueue to slack server queue */ +void enqueue_slacks(server_t *s); +/* Merge servres whose deadline has passed current time */ +void merge_slacks(unsigned long long ); + +extern int set_rt_task(struct task_struct *, server_t *, struct rbed_sched_param); +extern void resort_task_rt(struct task_struct *p); + +extern int get_random_number(void); + +/* on machine RTUNA, an averaged context switch is 3649 cycles (which + * is 3337ns); on other machines, you may have to calibrate it */ +//#define context_switch_in_ns (long)(cycles_2_ns(3649)) +#define context_switch_in_ns (3337) //use a little bit more than 3337ns + +#endif diff -urN linux-2.6.8.1/include/linux/sched.h linux-2.6.8.1-rbed/include/linux/sched.h --- linux-2.6.8.1/include/linux/sched.h 2004-08-14 03:54:49.000000000 -0700 +++ linux-2.6.8.1-rbed/include/linux/sched.h 2005-11-07 10:13:52.000000000 -0800 @@ -100,6 +100,9 @@ #include #include #include +#include +#include +#include #include @@ -527,6 +530,68 @@ struct mempolicy *mempolicy; short il_next; /* could be shared with used_math */ #endif + + /* CONFIG_RBED */ + + // keep these items in even when not using CONFIG_RBED so + // that we don't have to rebuild everything when compiling for + // regular linux (sched.h requires a lot of rebuilds). + +/* + int hodgepodge_debug; + int use_slack; // bool + int need_measure_usage; // !=0 must measure usage + + unsigned long last_load; + unsigned long load_contrib; + unsigned long nominal_load; + + unsigned long measured_usage; + unsigned long curr_measured_usage; + + unsigned long edf_budget; // us + + unsigned long edf_util; + unsigned long edf_deadline; // jiffies + unsigned long edf_period; // jiffies + unsigned long edf_release; // jiffies + + unsigned long est_period; // jiffies + unsigned long last_wake_up; // jiffies + unsigned long confidence; // jiffies + + struct timer_list release_timer; +*/ + +/* rbed main process property fields*/ + int srt_type; //SRT: MD, RA, RB + int process_type; //HRT, SRT, BE + unsigned long period; + unsigned long long edf_deadline;//-1->bigger value + unsigned long actual_period; //actual period + unsigned long previous_period; //previous period + unsigned long exe_time; + unsigned long long release_time; + unsigned long long completion_time; + unsigned long long initial_release_time; + int rbed_state; + int weight; +/* rbed apic timer fields*/ + unsigned long long apic_start_tsc; + long long apic_left_tsc; + struct apic_timer_list * apic_timer; +/* rbed process qos fields*/ +/* + int nr_qos_level; //number of qos levels, <=10 + int ct_qos_level; //current qos level, [0..9] + struct qos_struct * qos;//qos specifications + long qos_scaler; //qos scale factor for controlling external errors + long prev_qos_scaler; //save previous qos scale factor +*/ + u_srt_spec *srt_spec; //union of soft real-time specition +/*servers*/ + server_t * server; + server_t * current_server; }; static inline pid_t process_group(struct task_struct *tsk) diff -urN linux-2.6.8.1/include/linux/time.h linux-2.6.8.1-rbed/include/linux/time.h --- linux-2.6.8.1/include/linux/time.h 2004-08-14 03:55:35.000000000 -0700 +++ linux-2.6.8.1-rbed/include/linux/time.h 2005-11-07 10:13:51.000000000 -0800 @@ -41,7 +41,8 @@ * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ -#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) +//#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) +#define INITIAL_JIFFIES ((unsigned long)(0)) /* * Change timeval to jiffies, trying to avoid the diff -urN linux-2.6.8.1/include/linux/uka_apic_timer.h linux-2.6.8.1-rbed/include/linux/uka_apic_timer.h --- linux-2.6.8.1/include/linux/uka_apic_timer.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/include/linux/uka_apic_timer.h 2005-11-07 10:13:52.000000000 -0800 @@ -0,0 +1,77 @@ +/* -*- linux-c -*- + * uka_apic_timer.h + * + * Module providing precise timers using the local APIC timer. + * + * Copyright (C) 2000 Vincent Oberle (vincent@oberle.com) + * Institute of Telematics, University of Karlsruhe, Germany. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _UKA_APIC_TIMER_H +#define _UKA_APIC_TIMER_H + +/*flag to notify the kernel the apic timer is ready to use*/ +extern int apic_flag; +/* + * A timer struct fot APIC timers, + * similar to the generic timer_list in timer.h + * + * "expires" is the value of the TSC register when the timer expires. + * "function" is the function to execute. + * Its parameters are 1) the expires value 2) the data value. + * "data" is a value/pointer that will be passed to the function. + */ +struct apic_timer_list { + struct apic_timer_list *next, *prev; + unsigned long long expires; + unsigned long data; + void (*function)(unsigned long long, unsigned long); +}; + + +/* Initializes a APIC timer list struct. */ +static inline void init_apic_timer (struct apic_timer_list *timer) +{ + timer->next = timer->prev = NULL; +} + +/* Adds an APIC timer. */ +extern int add_apic_timer (struct apic_timer_list *timer); + +/* Removes an APIC timer. */ +extern int del_apic_timer (struct apic_timer_list *timer); + +/* + * More efficient way to update the expire field of an active timer + * (if the timer is inactive it will be activated). + * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a) + */ +extern int mod_apic_timer (struct apic_timer_list *timer, unsigned long long expires); + +/*added by caixue lin on 04/14/03*/ +extern int setup_uka_apic_timer (void); +extern void print_apic_timer_list (void); + +/* + * Useful macros to get the high and the low 32 bits of a 64-bits + * unsigned long long (ULL), and to convert the high + low part in the + * 64-bits ULL. + */ +#define ULL_LOW(x) (unsigned long)(x & 0xFFFFFFFF) +#define ULL_HIGH(x) (unsigned long)(x >> 32) +#define TO_ULL(d, a) (((unsigned long long)d << 32) + a) + +/* + * Absolute value of the difference. + */ +#define DIFF_ABS(a,b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a))) + +#endif /* _UKA_APIC_TIMER_H */ + + + + diff -urN linux-2.6.8.1/init/version.c linux-2.6.8.1-rbed/init/version.c --- linux-2.6.8.1/init/version.c 2004-08-14 03:55:32.000000000 -0700 +++ linux-2.6.8.1-rbed/init/version.c 2005-11-07 11:54:16.000000000 -0800 @@ -28,6 +28,12 @@ EXPORT_SYMBOL(system_utsname); +#ifndef CONFIG_RBED const char *linux_banner = "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; +#else +const char *linux_banner = + "RBED(Linux) version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; +#endif diff -urN linux-2.6.8.1/kernel/Makefile linux-2.6.8.1-rbed/kernel/Makefile --- linux-2.6.8.1/kernel/Makefile 2004-08-14 03:54:51.000000000 -0700 +++ linux-2.6.8.1-rbed/kernel/Makefile 2005-11-07 10:13:44.000000000 -0800 @@ -7,7 +7,7 @@ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o + kthread.o rbed.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff -urN linux-2.6.8.1/kernel/rbed.c linux-2.6.8.1-rbed/kernel/rbed.c --- linux-2.6.8.1/kernel/rbed.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.8.1-rbed/kernel/rbed.c 2005-11-21 13:27:40.000000000 -0800 @@ -0,0 +1,1144 @@ +/* + * kernel/rbed.c + * + * RBED real-time scheduler and related functions + * + * Copyright (C) 2003-2005 Caixue Lin (lcx@cse.ucsc.edu) + * Computer Science Department, University of California, Santa Cruz. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING.rbed in the main directory + * of this archive for more details. + * + * 2003-05-01 First version of RBED implemented in 2.4 by Caixue Lin + * 2005-10-01 Second version of RBED implemented in 2.6 by Caixue Lin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_RBED +#include +#include +#include + +#include +#include +#include +#include + +LIST_HEAD(server_head); +LIST_HEAD(slacks_head); +LIST_HEAD(expired_server_head); + +static DECLARE_WAIT_QUEUE_HEAD(apic_wq_head); + +extern int scheduling_policy; +extern long slack; +extern int laxity_schedule; + +int nr_server=0; +int nr_expired_server=0; +int nr_slacks=0; +server_t bes; +server_t slacks; +unsigned long U_CPU = 1000000, U_BE = 1000000, U_RT = 0; +unsigned long total_rt_weight = 0, total_srt_weight = 0; +spinlock_t server_queue_lock; + +/*schedule overhead variables*/ +extern int print_overhead; +extern long schedule_count; +extern unsigned long long total_schedule_time; +extern long schedule_context_count; + +extern unsigned long nr_running_rt(void); +extern void nr_running_rt_inc(void); +extern void nr_running_rt_dec(void); +extern unsigned long long nr_context_switches(void); + +void enqueue_server(server_t *s); +void wrap(task_t *p, server_t *s); + + +#define SCALE_FACTOR 10 /* 2^10, carefully chosen */ +#define cpu_mhz (cpu_khz/1000) + +/* Convert CPU cycles to nanoseconds */ +unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long cyc2ns_scale = (1000000 << SCALE_FACTOR)/cpu_khz; + return (cyc * cyc2ns_scale) >> SCALE_FACTOR; +} + +/* Convert microseconds to clock cycles used by the apic_timer + * functions*/ +inline unsigned long us_2_cycles(unsigned long us) +{ + unsigned long cycles = (us * cpu_khz)/1000; //approximate + + return cycles; +} + +/* Convert microseconds to clock cycles used by the apic_timer + * functions*/ +inline unsigned long long ns_2_cycles(unsigned long long ns) +{ + unsigned long ns2cyc_scale = (cpu_khz << SCALE_FACTOR)/1000000; + return (ns * ns2cyc_scale) >> SCALE_FACTOR; +} + +/* Generate a random integer */ +int get_random_number(){ + int random_number; + get_random_bytes(&random_number, sizeof(random_number)); + return random_number; +} + + +/*Init the BEServer*/ +void BEServer_init(task_t *p) +{ + server_t *s=&bes; + struct list_head *lh=&s->server_list; + struct list_head *leh=&s->expired_server_list; + //or U_BE = U_CPU - U_RT; + U_BE = max(U_BETA, (U_CPU - U_RT)); + + bes.sid = 0; + bes.policy = SCHED_NORMAL; + bes.timestamp = current->timestamp; + bes.period = BE_DEFAULT_PERIOD; + bes.deadline = bes.timestamp; //will be extended in wake_up_server + bes.budget = (bes.period/U_CPU) * U_BE; + bes.left_time = bes.budget; + bes.state = S_RUNNING; + lh->next = lh->prev = lh; + leh->next = leh->prev = leh; + bes.inqueue=0; + bes.inexpiredqueue=0; + bes.p = NULL; //point to the served task, idle task??? + + p->server = p->current_server = &bes; + //is current == p == (idle task) at this time??? + current->server = current->current_server = &bes; + + /* Note: bes server will be added to server list in schedule() upon first + * use of apic timer*/ +} + +/* Init the Slack Server. Note that slack server is never in queue */ +void SlackServer_init(void) +{ + slacks.sid = 0; + slacks.policy = SCHED_RBED; + slacks.timestamp = current->timestamp; + slacks.period = 0; + slacks.deadline = -1; + slacks.budget = 0; + slacks.left_time = 0; + slacks.state = S_RUNNING; + /* slack server should always be set to inqueue even it's not queued */ + slacks.inqueue = 1; + slacks.inexpiredqueue = 0; + slacks.p = NULL; +} + +void __init rbed_init(task_t *p) +{ + printk("RBED scheduler initialized, first pid = %d\n", p->pid); + + BEServer_init(p); + SlackServer_init(); +} + +void init_be_task(task_t *p, int time_slice) +{ + p->server = p->current_server = &bes; + p->period = 0; + p->process_type = BE; + + /* edf_deadline has type 'unsigned long long', so + * -1 makes it a big value, enough bigger than any + * RT task's deadline + */ + p->edf_deadline = -1; +} + +void dequeue_expired_server(server_t *s) +{ + if(!s->inexpiredqueue) + return; + + list_del(&s->expired_server_list); + s->inexpiredqueue=0; + nr_expired_server --; +} + + +void expired_server_list_add_sorted(unsigned long long deadline, struct list_head *new, struct list_head *list) +{ + struct list_head *i; + unsigned long long now = sched_clock(); + + list_for_each(i, list) + { + server_t *s= list_entry(i, server_t, expired_server_list); + // double check if still expired + if((long long)(s->deadline - now)<=(long long)s->period){ + i=i->prev; + dequeue_expired_server(s); + continue; + } + + if(deadline <= s->deadline - s->period) + break; + } + list_add_tail(new, i); +} + +void enqueue_expired_server(server_t *s) +{ + if(s->inexpiredqueue) + return; + + /* enqueue according to the previous deadline */ + expired_server_list_add_sorted(s->deadline - s->period, &s->expired_server_list, &expired_server_head); + s->inexpiredqueue=1; + nr_expired_server ++; +} + +/*sort the expired server list according EDF*/ +void resort_expired_server(server_t *s) +{ + dequeue_expired_server(s); + enqueue_expired_server(s); +} + +void server_list_add_sorted(unsigned long long deadline, struct list_head *new, struct list_head *list) +{ + struct list_head *i; + + list_for_each(i, list) + { + server_t *s= list_entry(i, server_t, server_list); + if(deadline <= s->deadline) + break; + } + list_add_tail(new, i); +} + +void dequeue_server(server_t *s) +{ + if(!s->inqueue){ + return; + } + list_del(&s->server_list); + s->inqueue=0; + nr_server --; +} + +void enqueue_server(server_t *s) +{ + if(s->inqueue){ + return; + } + server_list_add_sorted(s->deadline, &s->server_list, &server_head); + s->inqueue=1; + nr_server ++; + +} + + +/*sort the server list according EDF*/ +void resort_server(server_t *s) +{ + dequeue_server(s); + enqueue_server(s); +} + +void slack_server_list_add_sorted(unsigned long long deadline, struct list_head *new, struct list_head *list) +{ + struct list_head *i; + + list_for_each(i, list) + { + slacks_t *s= list_entry(i, slacks_t, slack_server_list); + if(deadline <= s->deadline) + break; + } + list_add_tail(new, i); +} + + +/* Dequeue from slack server queue */ +void dequeue_slacks(server_t *s) +{ + struct list_head * s_hd; + unsigned long long now = sched_clock(); + slacks_t * first_slacks; + + /* Shall we merge here? */ + merge_slacks(now); + + /* Get the first slack server and dequeue it */ + s_hd = &slacks_head; + + if (s_hd->next != &slacks_head && nr_slacks > 0){ + first_slacks = list_entry(s_hd->next, slacks_t, slack_server_list); + s->deadline = first_slacks->deadline; + s->left_time = first_slacks->left_time; + + list_del(&first_slacks->slack_server_list); + kfree(first_slacks); + nr_slacks --; + } + else{ + nr_slacks = 0; + s->deadline = -1; + s->left_time = 0; + } +} + +/* Enqueue to slack server queue */ +void enqueue_slacks(server_t *s) +{ + unsigned long long now = sched_clock(); + + slacks_t * new_slacks = (slacks_t *) kmalloc(sizeof(slacks_t),GFP_ATOMIC); + new_slacks->slack_server_list.next = &(new_slacks->slack_server_list);//point to next server + new_slacks->slack_server_list.prev = &(new_slacks->slack_server_list);//point to prev server + + new_slacks->deadline = s->deadline; + new_slacks->left_time = s->left_time; + + slack_server_list_add_sorted(new_slacks->deadline, &new_slacks->slack_server_list, &slacks_head); + nr_slacks ++; + + merge_slacks(now); +} + +/* Merge servres whose deadline has passed current time */ +void merge_slacks(unsigned long long valid_deadline) +{ + long left_time = 0; + struct list_head * s_hd = &slacks_head; + slacks_t * first_slacks; + + //we need simplify this!!! + //if (!first_slacks || first_slacks->deadline > valid_deadline) + // return; + + if (nr_slacks <=0 || s_hd->next == &slacks_head) + return; + else + first_slacks = list_entry(s_hd->next, slacks_t, slack_server_list); + + while (first_slacks && first_slacks->deadline <= valid_deadline){ + left_time += first_slacks->left_time; + + list_del(&first_slacks->slack_server_list); + kfree(first_slacks); + nr_slacks --; + + /* Go to the first server again */ + s_hd = &slacks_head; + if(nr_slacks <= 0 || s_hd->next == &slacks_head) + break; + first_slacks = list_entry(s_hd->next, slacks_t, slack_server_list); + } + + //set the new merged server's deadline to NOW + if(left_time > 0){ + slacks_t * new_slacks = (slacks_t *) kmalloc(sizeof(slacks_t),GFP_ATOMIC); + new_slacks->slack_server_list.next = &(new_slacks->slack_server_list);//pointer to next server + new_slacks->slack_server_list.prev = &(new_slacks->slack_server_list);//pointer to prev server + + new_slacks->deadline = valid_deadline; + new_slacks->left_time = left_time; + + server_list_add_sorted(new_slacks->deadline, &new_slacks->slack_server_list, &slacks_head); + nr_slacks ++; + } +} + +/*Reset the scheduling parameters for a server when it expires */ +void reset_server(server_t *s) +{ + /********************************************************* + * If only one server (including sleeping) and the server is + * BEServer, then adjust its release time, deadline, etc + *********************************************************/ + unsigned long long now = sched_clock(); + + /* Careuful: this lock may make apic_timer dies because of dead + * lock??? */ + //spin_lock(&server_queue_lock); + + /* set the server to 'Expired' status */ + s->state = S_EXPIRED; + s->timestamp = now; + s->rtimestamp = now; + + /* careful with deadline, is this correct? + * 1. passed the deadline: new release based on now + * 2. use the extended deadline + */ + if(now > s->deadline || nr_server == 1) + s->deadline = now + s->period; + else + s->deadline += s->period; + s->left_time = s->budget; + + resort_server(s); + + if(s->policy == SCHED_CASH){ + struct task_struct * served_p = find_task_by_pid(s->sid); + /* dequeue the server from cash queue */ + s->incashqueue = 0; + /* task has same deadline as server */ + if(served_p && !served_p->state){ + served_p->release_time = s->deadline - s->period; + served_p->edf_deadline = served_p->release_time + s->period; + s->p = served_p; + } + else + printk("Panic: in reset_server (%d)\n", s->sid); + } + + //spin_unlock(&server_queue_lock); +} + +/*Reset the scheduling parameters for a server when it wakes up*/ +/* For wake_up, a server was dequeued before */ +void wake_up_server(server_t *s) +{ + unsigned long long now = sched_clock(); + + //spin_lock(&server_queue_lock); + + /* set the server to 'Expired' status */ + s->state = S_EXPIRED; + s->timestamp = now; + s->rtimestamp = now; + + /* careful with deadline, is this correct? + * 1. passed the deadline: new release based on now + * 2. use the extended deadline + */ + if(now >= s->deadline){ + s->deadline = now + s->period; + s->left_time = s->budget; + } + else{ + unsigned long long laxity = s->deadline - now; + unsigned long long tmp1= s->period * s->left_time; + unsigned long long tmp2= laxity * s->budget; + + /* if no left budget, extend deadline*/ + if (s->left_time == 0){ + s->deadline += s->period; + s->left_time = s->budget; + } + /* if c >= U * laxity, generates a new deadline */ + else if(tmp1 >= tmp2){ + s->deadline = now + s->period; + s->left_time = s->budget; + } + //else + /*otherwise, uses the same budget, same deadline */ + } + + enqueue_server(s); + + if(s->policy == SCHED_BACKSLASH && s->inexpiredqueue) + resort_expired_server(s); + + if(s->policy == SCHED_CASH) + /* dequeue the server from cash queue */ + s->incashqueue = 0; + + //spin_unlock(&server_queue_lock); +} + +/*Reset the scheduling parameters for a rt task when it wakes up*/ +/* Two ways to set a release time upon deadline miss: 1. re-phase: new + * release (r) is current time, and d=r+p 2. not re-phase: r +=p and + * d=r+p. Currently, we are using the first one*/ +void reset_process(task_t *p){ + unsigned long long now = sched_clock(); + + switch (p->policy){ + case SCHED_BACKSLASH: + case SCHED_CBS: + case SCHED_EDF: + p->release_time = now; + p->edf_deadline = p->release_time + p->period; + break; + case SCHED_CASH: + if(p->server->incashqueue){ + /* CASH server is in cash queue, d = d + p */ + p->release_time = p->server->deadline; + p->edf_deadline = p->release_time + p->server->period; + } + else{ + p->release_time = p->server->deadline - p->server->period; + p->edf_deadline = p->release_time + p->server->period; + } + break; + } +} + +/*reclaim resources from rt server to bes*/ +void reclaim_resource(server_t *s){ + //unsigned long long old_rate; + unsigned long total_hfrb_weight, total_aval_weight; + + //spin_lock(&server_queue_lock); + total_hfrb_weight = total_rt_weight - total_srt_weight; + total_aval_weight = U_CPU - U_BETA - total_hfrb_weight; + + total_rt_weight -= s->weight; + /*underloaded*/ + if(total_aval_weight >= total_srt_weight){ + if(s->process_type == SRT && s->srt_type != FRT && s->srt_type != RBSRT) + total_srt_weight -= s->weight; + //no need resource re-allocation + //but bes should reclaim + } + else{ + /*overloaded*/ + if(s->process_type == SRT && s->srt_type != FRT && s->srt_type != RBSRT) + total_srt_weight -= s->weight; + total_hfrb_weight = total_rt_weight - total_srt_weight; + total_aval_weight = U_CPU - U_BETA - total_hfrb_weight; + + //re-allocate resource for all the soft real-time tasks + wrap(NULL, NULL); + /*overloaded -> overloaded*/ + if(total_aval_weight < total_srt_weight) + return; + ///*overloaded -> underloaded*/ + //else + //bes should reclaim + } + + +/* + old_rate = s->budget; + old_rate *= U_CPU; + do_div(old_rate, s->period); +*/ + + + //update bes's util, period and deadline (using EDF theory???) + //1. If there is only the BEServer, re-release at now + //2. RT server exit, reclaim resources. + + /**************************************************/ + /*nr_server==1 does not mean there is no RT server???*/ + /**************************************************/ + + if(nr_server == 1){ + U_BE = max(U_BETA, (U_CPU - U_RT)); + bes.deadline = sched_clock() + bes.period; + } + else{ + //U_BE += old_rate; + U_BE = total_aval_weight - total_srt_weight + U_BETA; + } + bes.budget = (bes.period/U_CPU) * U_BE; + bes.left_time = bes.budget; + + //spin_unlock(&server_queue_lock); +} + + +/* Check if a RBSRT's buffer is full. If yes, block unitl buffer + * is almost empty */ +void rbsrt(task_t *served_p, server_t *s) +{ + unsigned long long slack = s->deadline - sched_clock(); + unsigned long buffersize = served_p->srt_spec->rbsrt.buffersize; + unsigned long buffersize_in_ns = buffersize * s->period; + + //printk("RBSRT test1\n"); + // Buffer is full: block until the buffer is almost empty + if (buffersize && slack >= buffersize_in_ns){ + if(!served_p->state) + apic_sleep(served_p, buffersize_in_ns - s->period); + //printk("***************RBSRT test2****************\n"); + } +} + +/* Different actions upon a server's expiring: + * 1). recharge the server if RUNNING + * 2). suspends the server if SLACK + * 3). free the server if STOPPED + * 4). *** server should be in EXPIRED *** + */ +void expire_server(server_t *s) +{ + + task_t *p = s->p; + + /* Separate operation on bes and rt servers + * 1. BEServer is never dequeued since it also servers idle task + * 2. BEServer's parameter should be adjusted when serving idle task + */ + if(s == &bes){ + /* Separate operation on BE and RT tasks */ + if(p && p->process_type == BE){ + /* bes never expires or stops */ + if(!p->state && s->state == S_RUNNING) + /* In the following function, bes.p is re-assigned */ + move_to_tail(p, s); + } + //else if(p){//RT task, do nothing + + /* if p==idle, should we reset the release time?? */ + reset_server(s); + } + else if(s == &slacks){ + /* Special actions for slack server */ + if(nr_slacks > 0){ + dequeue_slacks(s); + } + else{ + s->deadline = -1; + s->left_time = 0; + } + s->timestamp = sched_clock(); + } + else { + /* rt servers. Timer expires because of + * 1. task overrunning? extending deadline + * 2. task completes? server suspends + * 3. task exits? server diminished + */ + struct task_struct * served_p = s->sid ? find_task_by_pid(s->sid) : current; + + /* Deal with RBSRT: buffer is full and need block? */ + if(s->process_type == SRT && s->srt_type == RBSRT && served_p) + rbsrt(served_p, s); + + if(s->state != S_STOPPED){ + /* Own task completes, expire it */ + if(!served_p || served_p->state) + s->state = S_SLACK; + else + s->state = S_RUNNING; + } + if(s->state == S_RUNNING){ + reset_server(s); + + if(!served_p || served_p->state) + printk("Bug slack server -> running server\n"); + } + else if (s->state == S_SLACK){ + s->state = S_EXPIRED; + + dequeue_server(s); + + if(served_p && !served_p->state) + printk("Bug running server -> slack server\n"); + + /* The following assignment forces to skip the time + * calculation in schedule() */ + s->timestamp = sched_clock(); + + } + else if (s->state == S_STOPPED){ + dequeue_server(s); + dequeue_expired_server(s); + if(p && p->current_server == s) p->current_server=NULL; + /* re-allocate resources: here? or wake up? */ + reclaim_resource(s); + kfree(s); + s = NULL; + + /* output the overhead when the last real-time task exits*/ + if(print_overhead){ + switch (scheduling_policy){ + case SCHED_BACKSLASH: printk("BACKSLASH "); break; + case SCHED_CBS: printk("CBS "); break; + case SCHED_EDF: printk("EDF "); break; + case SCHED_CASH: printk("CASH "); break; + } + schedule_context_count = nr_context_switches() - schedule_context_count; + printk("overhead (sc, cs, sc_time, cs_time): %5lu %5ld %8llu %8llu\n", + schedule_count, schedule_context_count, total_schedule_time, + schedule_context_count*context_switch_in_ns); + /*reset the variables, prepare for next measurement*/ + print_overhead = 0; + } + } + else + printk("Error: server should not be in expired state\n"); + } +} + +/* apic_brake: + * this function is called when an apic_timer expires, + * i.e. the process uses up the wcet. In this case, a + * wrong wcet was given by the user aplication. + */ +void apic_brake(unsigned long long exp, unsigned long data) +{ + server_t * s = (server_t *) data; + + if(!s->inqueue){ + printk("server aready dequeued in apic brake\n"); + return; + } + + if(!s->p) printk("no served task\n"); + + + /* if a valid SLAD server is in laxity scheduling, just reschedule */ + if(!laxity_schedule){ + /* save the run time for back slack donation */ + slack = s->left_time; + /* empty the server budget */ + s->left_time = 0; + + expire_server(s); + } + laxity_schedule = 0; + + set_tsk_need_resched(current); +} + +/* apic_start: + * Start the apic one-shot timer when the process is ready to + * execute after creation or selected to be the running process + */ +struct apic_timer_list apic_timer; +int apic_start(server_t *s, unsigned long long now_in_tsc) +{ + unsigned long long when; + + //set the server to 'Ready to use' status + //state transfer: + //1. normal case (here): expired->running->slack->expired + //2. recovery case (in __activate_task_rt()): slack->running->slack->expired + if(s->state!=S_SLACK && s->state!=S_STOPPED) + s->state = S_RUNNING; + + when = now_in_tsc + ns_2_cycles(s->one_shot_time); + + init_apic_timer(&apic_timer); + apic_timer.expires = when; + apic_timer.data = (unsigned long) s; + apic_timer.function = (void *) apic_brake; + + if(!add_apic_timer(&apic_timer)) + printk("apic_start: error, can't add apic timer for server\n"); + + return 0; +} + +/* stop a apic one-shot timer specified by a process*/ +int apic_over(server_t *s) +{ + int rtn=0; + rtn = del_apic_timer(&apic_timer); + return rtn; +} + +static void apic_awake(unsigned long long exp, unsigned long data) +{ + struct task_struct * p = (struct task_struct *) data; + wake_up_process(p); // this wakes up the sleeping process +} + +/* apic_sleep: + * 1) puts the currently executing process to sleep + * for the specified number of microsconds, using + * the high resolution apic_timer. + * 2) moves the sleeping process's task structure to + * a wait queue and call the scheduler (via sleep_on) + * to pick a new process to run. + */ +int apic_sleep( struct task_struct *p, unsigned long long ns_time) +{ + //unsigned long long now, when; + unsigned long long now_in_tsc, when; + + + /* if less than 5us, then use udelay?? */ + if(ns_time < 5000){ + //udelay((unsigned long)(cycles + cpu_khz/2000)/(cpu_khz/1000));//round up to 1us + //return 1; + printk("apic sleeps (%llu) less than 5us", ns_time); + } + + rdtscll(now_in_tsc); + when = now_in_tsc + ns_2_cycles(ns_time); + + p->apic_timer = (struct apic_timer_list *) kmalloc(sizeof(struct apic_timer_list), GFP_KERNEL); + if (p->apic_timer==0){ + printk("Couldn't kmalloc the size for apic timer\n"); + return -1; + } + init_apic_timer(p->apic_timer); + p->apic_timer->expires = when; + p->apic_timer->data = (unsigned long) p; + p->apic_timer->function = apic_awake; + + if(!add_apic_timer(p->apic_timer)) + printk("apic_sleep: error, can't add apic timer for process: %d\n", p->pid); + + sleep_on(&apic_wq_head); + + kfree(p->apic_timer); + return 0; +} + + +/*************rbed_sleep()**************** + * Get the current time using the rdtsc register, compare it to the + * deadline of the process. If completion time < deadline, sleep for + * deadline - completion_time, else don't sleep. + ********************************************/ +/* +static inline int rbed_sleep(struct task_struct *p) +{ + unsigned long long ns_time; + unsigned long long completion_time; + //unsigned long long deadline; + int rtn = 0; + + // get the competion time of p in nanoseconds. + //we do not short the deadline until the next release + completion_time = p->completion_time = sched_clock(); + + //1. Met deadline, sleep until next release + //2. Missed deadline, contiue to run + if(completion_time <= p->edf_deadline) + { + //p->release_time = p->edf_deadline; + ns_time = (p->edf_deadline - completion_time); + apic_sleep(p, ns_time); + + rtn = 0x00; + } + else{ + //we didn't make deadline start the next period right now + reset_process(p); + + //re-insert the task upon deadline change + resort_task_rt(p); + + rtn = 0x10; + } + + return rtn; +} +*/ + + +void init_server(task_t *p, server_t *s, unsigned long weight) +{ + + s->process_type = p->process_type; + s->srt_type = p->srt_type; + + s->weight = weight; + total_rt_weight += weight; + /* Firm real-time is scheduled as HRT */ + if (s->process_type == SRT && s->srt_type != FRT && s->srt_type != RBSRT) + total_srt_weight += weight; + + s->sid = p->pid; + s->timestamp = p->release_time; + s->rtimestamp = p->release_time; + s->policy = p->policy; + /* server state will be set to S_RUNNING in apic_start() */ + s->state = S_EXPIRED; + /* Do we need to init the list below? */ + s->server_list.next = &(s->server_list); + s->server_list.prev = &(s->server_list); + s->expired_server_list.next = &(s->expired_server_list); + s->expired_server_list.prev = &(s->expired_server_list); + s->inqueue = 0; + s->inexpiredqueue = 0; + /* Point to the served task */ + s->p = p; + /* Assocate the server with the task */ + p->server = s; + /* we set the current_server in re-schedule() */ + //p->current_server = s; +} + +/* General rated-based resource allocation policy in underloaded system */ +void grap(task_t *p, server_t *s) +{ + unsigned long long budget; + unsigned long long new_rate = s->weight; + + /* The deadline will be extended in wake_up_server */ + s->deadline = s->timestamp; + + s->period = p->period; + budget = s->period; + budget *= new_rate; + do_div(budget, U_CPU); + s->budget = budget; + s->left_time = s->budget; +} + +/* Currently weight = minimum rate for all MDSRT and RASRT processes. + * In later work, for RASRT, weight = benefit(or error) * (minimum rate) + * given that their and other process' minimum rate is guaranteed. + */ +void wrap_process(task_t *p, server_t *s) +{ + unsigned long long budget; + unsigned long total_hfrb_weight, total_aval_weight; + unsigned long long new_rate; + + total_hfrb_weight = total_rt_weight - total_srt_weight; + total_aval_weight = U_CPU - U_BETA - total_hfrb_weight; + + /*underloaded*/ + if(total_aval_weight >= total_srt_weight) + new_rate = s->weight; + /*overloaded*/ + else{ + new_rate = s->weight; + new_rate *= total_aval_weight; + do_div(new_rate, total_srt_weight); + } + + switch(s->srt_type){ + case MDSRT: + //case RBSRT: + /* Use original exe time*/ + if(p) + s->budget = p->exe_time; + budget = s->budget; + budget *= U_CPU; + do_div(budget, new_rate); + /* be careful with period shortening! */ + s->period = budget; + //s->left_time = s->budget; + break; + case RASRT: + /* period should be lengthen or shortened*/ + if(1){ /* We need one more parameter!!! */ + /* Use original exe time*/ + if(p) + s->budget = p->exe_time; + budget = s->budget; + budget *= U_CPU; + do_div(budget, new_rate); + /* be careful with period shortening! */ + s->period = budget; + //s->left_time = s->budget; + } + /* Or exeTime be shortened */ + else{ + if(p) + s->period = p->period; + budget = s->period; + budget *= new_rate; + do_div(budget, U_CPU); + s->budget = budget; + + /* be careful with this! */ + if(s->budget < s->left_time) + s->left_time = s->budget; + } + break; + default: + printk("Bug: impossible to be here!"); + } +} + +/* Weighted rated-based resource allocation policy in overloaded system */ +void wrap(task_t *p, server_t *s) +{ + struct list_head * s_hd = &server_head; + struct list_head * i; + + /* For the specific server do resource re-allocation*/ + if(p&&s){ + /* The deadline will be extended in wake_up_server */ + s->deadline = s->timestamp; + wrap_process (p, s); + } + + /* For each SRT server do resource re-allocation*/ + list_for_each(i, s_hd) + { + task_t *served_p=NULL; + server_t *s= list_entry(i, server_t, server_list); + + /* Only MDSRT and RASRT should be re-allocated */ + if(s==&bes || s->process_type == HRT || + s->srt_type == FRT || s->srt_type == RBSRT) + continue; + + served_p = find_task_by_pid(s->sid); + if(!served_p){ + printk("Bug: served task(%d) has exit!\n", s->sid); + continue; + } + wrap_process (served_p, s); + } +} + + +/* Convert a BE process into RT process. Process period, wcet in ns, + * utilization in 1/1000000. + */ +int set_rt_task(task_t *p, server_t *s, struct rbed_sched_param lp){ + unsigned long long new_rate; + unsigned long available_rate; + int rtn = 0; + + if(p->process_type == lp.process_type){ + printk("You can't set a process with the same process type\n"); + /* We should define somewhere the error code*/ + return -500; + } + + + /* from us to ns */ + p->exe_time = lp.exe_time * 1000; + p->period = lp.period * 1000; + p->previous_period = p->actual_period = p->period; + new_rate = p->exe_time; + new_rate *= U_CPU; + do_div(new_rate, p->period); + + available_rate = U_BE - U_BETA; + + /* process type: HRT,SRT,BE */ + p->process_type = lp.process_type; + + /* Admission control for HRT processes */ + if(lp.process_type == HRT){ + /* if there is not enough resource for the HRT, it is rejected */ + if( new_rate > available_rate){ + unsigned long total_aval_weight = U_CPU - U_BETA - + (total_rt_weight - total_srt_weight); + if(new_rate > total_aval_weight) + //kfree(p->qos); + return -4000; + } + printk("BE -> HRT\n"); + } + + /* srt type: MDSRT, RASRT, RBSRT, FRT */ + if(lp.process_type == SRT) + printk("BE -> SRT, srt_type=%d\n", lp.srt_type); + p->srt_type = lp.srt_type; + p->srt_spec = (union union_srt_spec *) + kmalloc(sizeof(union union_srt_spec), GFP_ATOMIC); + memcpy(p->srt_spec, &(lp.srt_spec), sizeof(union union_srt_spec)); + + /* Admission control for some classes of SRT processes */ + switch(lp.srt_type){ + case MDSRT: + case RASRT: + /* RASRT may have minimum rate requirement, we will implement + * this later */ + if( available_rate == 0){ + printk("No resource available, can't convert BE to SRT\n"); + return -3000; + } + break; + case FRT: + /* Firm real-time is scheduled as HRT */ + printk("Firm real time, m = %d, k=%d\n", + p->srt_spec->frt.mk_m, p->srt_spec->frt.mk_k); + if( new_rate > available_rate){ + unsigned long total_aval_weight = U_CPU - U_BETA - + (total_rt_weight - total_srt_weight); + if(new_rate > total_aval_weight) + return -4000; + } + break; + case RBSRT: + /* RBSRT has minimum rate requirement */ + printk("RBSRT , buffer size = %lu\n", p->srt_spec->rbsrt.buffersize); + if( new_rate > available_rate){ + unsigned long total_aval_weight = U_CPU - U_BETA - + (total_rt_weight - total_srt_weight); + if(new_rate > total_aval_weight) + return -4000; + } + break; + } + + p->release_time = sched_clock(); + p->edf_deadline = p->release_time + p->period; + + /* After RT process init, init its server*/ + /* Now create and init a server and assocate it with the task */ + /* What's the difference between GFP_KERNEL and GFP_ATOMIC? */ + s = (server_t *) kmalloc(sizeof(server_t), GFP_ATOMIC); + init_server(p, s, (unsigned long)new_rate); + + + /* Underloaded system */ + if(new_rate <= available_rate){ + //use general rate allocation policy all the time + //HRT, FRT and RBSRT will definitely enter this branch + grap(p, s); + } + else{ + new_rate = available_rate; + if(s->process_type == HRT || s->srt_type == FRT || s->srt_type == RBSRT){ + grap(p, s); + wrap(NULL, NULL); + } + else{ + //only MDSRT and RASRT may enter this branch + //use weighted rate allocation policy all the time + wrap(p, s); + } + } + + printk("sid=%lu, p=%lu, exe=%lu, w=%lu\n", s->sid, s->period, s->left_time, s->weight); + + /* update bes's util, period and deadline (using EDF theory???) */ + U_BE -= new_rate; + bes.budget = (bes.period/U_CPU) * U_BE; + bes.left_time = bes.budget; + /* BE->RT, so the bes is considered as slack server */ + bes.state = S_SLACK; + + return rtn; +} + + + +/* +asmlinkage int sys_rbed_deadline_met(pid_t pid) +{ + struct task_struct *p; + int retval; + + p= pid ? find_task_by_pid(pid) : current; + + retval= rbed_sleep(p); + return retval; +} +*/ + +#endif //end of CONFIG_RBED diff -urN linux-2.6.8.1/kernel/sched.c linux-2.6.8.1-rbed/kernel/sched.c --- linux-2.6.8.1/kernel/sched.c 2004-08-14 03:55:59.000000000 -0700 +++ linux-2.6.8.1-rbed/kernel/sched.c 2005-11-07 10:13:44.000000000 -0800 @@ -44,6 +44,29 @@ #include +#ifdef CONFIG_RBED +#include +#include +#define APIC_DELAY (3) +#define APIC_UP(apic_flag) (apic_flag >= APIC_DELAY) +extern server_t bes; +extern int nr_server; +extern struct list_head server_head; +extern int nr_expired_server; +extern struct list_head expired_server_head; +int laxity_schedule=0; +int scheduling_policy = 0; +long slack=0; +int normal_schedule=0; +server_t * back_donate_server = NULL; + +/*schedule overhead variables*/ +int print_overhead=0; +long schedule_count=0;; +unsigned long long total_schedule_time=0; +long schedule_context_count=0; +#endif + #ifdef CONFIG_NUMA #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu)) #else @@ -157,8 +180,19 @@ #define LOW_CREDIT(p) \ ((p)->interactive_credit < -CREDIT_LIMIT) +#ifndef CONFIG_RBED //normal linux #define TASK_PREEMPTS_CURR(p, rq) \ ((p)->prio < (rq)->curr->prio) +#else // RBED +//#define TASK_PREEMPTS_CURR(p, rq) (1) +#define TASK_PREEMPTS_CURR(p, rq) \ + ( ((rq)->curr == (rq)->idle) || !(rq)->curr->current_server || (rq)->curr->state || \ + ((p)->process_type != (BE) && (p)->server == (rq)->curr->current_server) || \ + ((rq)->curr->current_server->state == S_SLACK && (p)->edf_deadline < (rq)->curr->edf_deadline) || \ + ((p)->server->deadline < (rq)->curr->current_server->deadline) || \ + ((p)->server->deadline == (rq)->curr->current_server->deadline && (p)->edf_deadline < (rq)->curr->edf_deadline) ) +#endif +//#define TASK_PREEMPTS_CURR(p, rq) ((rq)->curr == (rq)->idle) /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] @@ -190,11 +224,17 @@ typedef struct runqueue runqueue_t; +#ifndef CONFIG_RBED struct prio_array { unsigned int nr_active; unsigned long bitmap[BITMAP_SIZE]; struct list_head queue[MAX_PRIO]; }; +#else // RBED +struct prio_array { + struct list_head queue[1]; +}; +#endif /* * This is the main, per-CPU runqueue data structure. @@ -211,6 +251,9 @@ * remote CPUs use both these fields when doing load calculation. */ unsigned long nr_running; +#ifdef CONFIG_RBED + unsigned long nr_running_rt; +#endif #ifdef CONFIG_SMP unsigned long cpu_load; #endif @@ -254,6 +297,19 @@ # define task_running(rq, p) ((rq)->curr == (p)) #endif + +#ifdef CONFIG_RBED + +#ifdef rt_task +#undef rt_task +#define rt_task(p) ((p)->policy == SCHED_BACKSLASH || \ + (p)->policy == SCHED_CBS || \ + (p)->policy == SCHED_EDF || \ + (p)->policy == SCHED_CASH) +#endif + +#endif + /* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts. Note the ordering: we can safely lookup the task_rq without @@ -301,6 +357,7 @@ /* * Adding/removing a task to/from a priority array: */ +#ifndef CONFIG_RBED // normal linux static void dequeue_task(struct task_struct *p, prio_array_t *array) { array->nr_active--; @@ -330,6 +387,166 @@ p->array = array; } +#else // RBED +/* Task and server operation functions are defined here */ +static inline void list_add_sorted(unsigned long long edf_deadline, struct list_head *new, struct list_head *list) +{ + struct list_head *i; + + list_for_each(i, list) + { + task_t *t= list_entry(i, task_t, run_list); + if(edf_deadline <= t->edf_deadline) + break; + } + + list_add_tail(new, i); +} + +static void dequeue_task(struct task_struct *p, prio_array_t *array) +{ + list_del(&p->run_list); +} + +static void enqueue_task(struct task_struct *p, prio_array_t *array) +{ + list_add_sorted(p->edf_deadline, &p->run_list, array->queue); + p->array = array; +} + +static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +{ + list_add(&p->run_list, array->queue); + p->array = array; +} + +/* Real time tasks are unordered in the run queue */ +static void enqueue_task_rt(struct task_struct *p, prio_array_t *array) +{ + enqueue_task_head(p, array); +} + +void move_to_tail(struct task_struct *p, server_t *s) +{ + runqueue_t *rq = this_rq(); + + if(!p || p==rq->idle){ + s->p = rq->idle; + return; + } + + spin_lock_irq(&rq->lock); + // move it to the end of the queue + list_move_tail(&p->run_list, rq->active->queue); + spin_unlock_irq(&rq->lock); + + s->p = NULL; +} + +void resort_task_rt(struct task_struct *p) +{ + runqueue_t *rq = this_rq(); + dequeue_task(p, rq->active); + enqueue_task_rt(p, rq->active); +} + +/* + * __activate_task_rt - move a rt task to the runqueue (before BE tasks). + * wakes up its server if it's sleeping + */ +static inline void __activate_task_rt(task_t *p, runqueue_t *rq) +{ + server_t *s = p->server; + + if(s){ + /* force a EDF server to release at current time */ + if(s->policy == SCHED_EDF) + s->deadline = 0; + + if(!s->inqueue){ + wake_up_server(s); + } + else{ + if(s->left_time <= 0 || s->policy == SCHED_EDF){ + reset_server(s); + } + } + /* states: ready, slack, expired (sleeping), stopped (exit) */ + if(likely(s->state == S_EXPIRED)){ + /* wake_up_server recharges the server and enqueue it */ + /* What if the server is already waken up by its timer??? */ + /* For now a server can be only waken by its task, but not its timer */ + s->p=p; + p->current_server = s; + } + else if(unlikely(s->state == S_SLACK)){ + s->state = S_RUNNING; + s->p=p; + p->current_server = s; + } + /* overrun, but completed using beserver*/ + else if(unlikely(s->state == S_RUNNING)){ + s->p=p; + p->current_server = s; + } + } + else + printk("Error: no assocaited server for rt task:%d\n",p->pid); + + reset_process(p); + enqueue_task_rt(p, rq->active); + + /* we should be careful with following when we call this + * function from setscheduler() */ + rq->nr_running++; + rq->nr_running_rt++; +} + +unsigned long nr_running_rt(void) +{ +/* + unsigned long i, sum = 0; + + for_each_cpu(i) + sum += cpu_rq(i)->nr_running_rt; + + return sum; +*/ + runqueue_t * rq = this_rq(); + return rq->nr_running_rt; +} +void nr_running_rt_inc(void) +{ + runqueue_t * rq = this_rq(); + rq->nr_running_rt++; +} + +void nr_running_rt_dec(void) +{ + runqueue_t * rq = this_rq(); + rq->nr_running_rt--; +} + +/* Get the first non-slack server. + * This is only for SCHED_BACKSLASH*/ +server_t * get_normal_server(void){ + server_t * normal_server=NULL; + struct list_head * s_hd = &server_head; + struct list_head * i; + + list_for_each(i, s_hd) + { + server_t *s= list_entry(i, server_t, server_list); + task_t * served_p = find_task_by_pid(s->sid); + if(s!=&bes && served_p && !served_p->state){ + normal_server = s; + break; + } + } + return normal_server; +} +#endif + /* * effective_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. @@ -459,6 +676,7 @@ * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */ +#ifndef CONFIG_RBED // normal linux static void activate_task(task_t *p, runqueue_t *rq, int local) { unsigned long long now; @@ -501,10 +719,21 @@ __activate_task(p, rq); } +#else // RBED +static void activate_task(task_t *p, runqueue_t *rq, int local) +{ + local=local; + if(p->process_type == BE) + __activate_task(p, rq); + else + __activate_task_rt(p, rq); +} +#endif /* * deactivate_task - remove a task from the runqueue. */ +#ifndef CONFIG_RBED // normal linux static void deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; @@ -513,6 +742,23 @@ dequeue_task(p, p->array); p->array = NULL; } +#else // RBED +static void deactivate_task(struct task_struct *p, runqueue_t *rq) +{ + rq->nr_running--; + if(p->process_type!=BE) + rq->nr_running_rt--; + + if (p->state == TASK_UNINTERRUPTIBLE) { + rq->nr_uninterruptible++; + } + else if (p->state == TASK_INTERRUPTIBLE) { + } + + dequeue_task(p, p->array); + p->array = NULL; +} +#endif /* * resched_task - mark a task 'to be rescheduled now'. @@ -829,6 +1075,11 @@ p->activated = -1; } +#ifdef CONFIG_RBED + //is sync only for SMP? + sync=0; +#endif + /* * Sync wakeups (i.e. those types of wakeups where the waker * has indicated that it will leave the CPU in short order) @@ -895,6 +1146,7 @@ * total amount of pending timeslices in the system doesn't change, * resulting in more scheduling fairness. */ +#ifndef CONFIG_RBED local_irq_disable(); p->time_slice = (current->time_slice + 1) >> 1; /* @@ -917,6 +1169,7 @@ preempt_enable(); } else local_irq_enable(); +#endif } /* @@ -925,6 +1178,7 @@ * This function will do some initial scheduler statistics housekeeping * that must be done for every newly created process. */ +#ifndef CONFIG_RBED // normal linux void fastcall wake_up_forked_process(task_t * p) { unsigned long flags; @@ -959,6 +1213,26 @@ } task_rq_unlock(rq, &flags); } +#else +void fastcall wake_up_forked_process(task_t * p) +{ + unsigned long flags; + runqueue_t *rq = task_rq_lock(current, &flags); + + BUG_ON(p->state != TASK_RUNNING); + + set_task_cpu(p, smp_processor_id()); + + /* note that in sched_init, even the idle task will be + created as a bebs task, but then a later call to + init_idle will put its priority back to normal */ + init_be_task(p, task_timeslice(p)); + + __activate_task(p, rq); + + task_rq_unlock(rq, &flags); +} +#endif /* * Potentially available exiting-child timeslices are @@ -991,6 +1265,17 @@ (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg / (EXIT_WEIGHT + 1); task_rq_unlock(rq, &flags); + +#ifdef CONFIG_RBED // normal linux + if(p->server && p->server != &bes){ + p->server->state = S_STOPPED; + p->server->p = NULL; + + /* if the server is already dequeued, delete it */ + if(!p->server->inqueue) + expire_server(p->server); + } +#endif } /** @@ -1978,6 +2263,7 @@ * It also gets called by the fork code, when changing the parent's * timeslices. */ +#ifndef CONFIG_RBED // normal linux void scheduler_tick(int user_ticks, int sys_ticks) { int cpu = smp_processor_id(); @@ -2092,6 +2378,57 @@ out: rebalance_tick(cpu, rq, NOT_IDLE); } +#else // RBED +void scheduler_tick(int user_ticks, int sys_ticks) +{ + int cpu = smp_processor_id(); + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + runqueue_t *rq = this_rq(); + task_t *p = current; + + rq->timestamp_last_tick = sched_clock(); + + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_ticks); + + /* note: this timer irq context must be accounted for as well */ + if (hardirq_count() - HARDIRQ_OFFSET) { + cpustat->irq += sys_ticks; + sys_ticks = 0; + } else if (softirq_count()) { + cpustat->softirq += sys_ticks; + sys_ticks = 0; + } + + if (p == rq->idle) { + if (atomic_read(&rq->nr_iowait) > 0) + cpustat->iowait += sys_ticks; + else + cpustat->idle += sys_ticks; + rebalance_tick(cpu, rq, IDLE); + return; + } + if (TASK_NICE(p) > 0) + cpustat->nice += user_ticks; + else + cpustat->user += user_ticks; + cpustat->system += sys_ticks; + + /* Task might have expired already, but not scheduled off yet */ + if(!APIC_UP(apic_flag)){ + if(apic_flag >= 1){ + apic_flag ++; + } + if (p->array != rq->active) { + //BUG_ON(1); // we never go here + set_tsk_need_resched(p); + goto out; + } + } +out: + rebalance_tick(cpu, rq, NOT_IDLE); +} +#endif #ifdef CONFIG_SCHED_SMT static inline void wake_sleeping_dependent(int cpu, runqueue_t *rq) @@ -2181,6 +2518,7 @@ /* * schedule() is the main scheduler function. */ +#ifndef CONFIG_RBED // normal linux asmlinkage void __sched schedule(void) { long *switch_count; @@ -2316,7 +2654,519 @@ if (test_thread_flag(TIF_NEED_RESCHED)) goto need_resched; } +#else // RBED + +/* Back donation for BACKSLASH: careful with overhead!!!*/ +static inline void back_donate(server_t * prev_server, + task_t *prev, runqueue_t *rq, long rt, unsigned long long now) +{ + + /* We need to consider slack server!!! */ + if(scheduling_policy == SCHED_BACKSLASH && nr_expired_server > 0 && + normal_schedule && (back_donate_server!=prev_server || prev==rq->idle)){ + struct list_head * s_ehd = &expired_server_head; + struct list_head * i; + long total_slack; + + if(!slack) + slack = rt; + total_slack = slack; + + list_for_each(i, s_ehd) + { + server_t *s= list_entry(i, server_t, expired_server_list); + long available_slack; + + if((long long)(s->deadline - now) < (long long)s->period) + available_slack = max((long)0, slack - (long)(now - (s->deadline - s->period))); + else + available_slack = slack; + + slack -=available_slack; + /* Can't exceed its budget */ + if((long)s->left_time + available_slack > s->budget){ + available_slack -= s->budget - s->left_time; + s->left_time = s->budget; + /* Dequeue after fully back donation */ + i=i->prev; + dequeue_expired_server(s); + } + else{ + s->left_time += available_slack; + available_slack = 0; + /* Double check if still expired */ + if((long long)(s->deadline - now)<=(long long)s->period || + s->budget == s->left_time){ + i=i->prev; + dequeue_expired_server(s); + } + } + slack +=available_slack; + if(slack<=0) + break; + } + + /* We have to be careful here!!! */ + if(back_donate_server != prev_server){ + prev_server->left_time += slack; + if(prev_server->left_time > prev_server->budget) + prev_server->left_time = prev_server->budget; + + back_donate_server->left_time -= total_slack; + if(back_donate_server->left_time <= 0){ + back_donate_server->left_time = 0; + expire_server(back_donate_server); + } + } + } +} + +/* It does time bookkeeping, does back donation, etc */ +static inline void prepare_schedule(server_t * prev_server, + task_t *prev, runqueue_t *rq, unsigned long long now) +{ + unsigned long long run_time; + long rt = 0; + + /* If not slack scheduling, deal with the task's server right away */ + if (prev->current_server == prev->server || (prev->server && !prev->server->inqueue)) + goto process_current_server; + + /* Process the task's server first */ + if(prev->state && prev->server){ + if(prev->server->state != S_STOPPED) + prev->server->state = S_SLACK; + prev->server->p = NULL; + } + + /* If a real-time server is not valid for slack, expire it */ + if(prev->server && prev->server != &bes){ + switch (prev->server->policy){ + case SCHED_BACKSLASH: + if (((long long)(prev->server->deadline - now) >= + (long long)prev->server->period) && prev->state){ + //prev->server->p = prev; //!!!This caused kernel panic + expire_server(prev->server); + + if(prev->server && !prev->server->inexpiredqueue && + prev->server->budget > (long)prev->server->left_time) + enqueue_expired_server(prev->server); + } + break; + case SCHED_CBS: + case SCHED_EDF: + if (prev->state) + expire_server(prev->server); + break; + case SCHED_CASH: + if (prev->state) + prev->server->incashqueue = 1; + break; + } + } + + +process_current_server: + prev_server = prev->current_server; + + /* Check if it's slack server (not stopped yet) */ + if(prev==rq->idle || prev->state || prev_server != prev->server){ + if(prev_server->state != S_STOPPED) + prev_server->state = S_SLACK; + if(prev->state) + prev_server->p = NULL; + } + + /* Calculate the last run (elapsed) time */ + run_time = (now - prev_server->timestamp); + rt = run_time; + if(rt<0){ + /* why is this possible? */ + rt = 0; + } + prev_server->left_time -= rt; + + /* Use real timestamp to calculate the elapsed time */ + rt = (now - prev_server->rtimestamp); + + /* If less than a context switch time, reset it to 0 */ + if(prev_server->left_time < context_switch_in_ns) + prev_server->left_time = 0; + + /* Start back slack donation */ + back_donate(prev_server, prev, rq, rt, now); + + /* If the current server uses up budget, recharge by expiring it */ + if(prev_server->inqueue && prev_server->left_time <= 0){ + expire_server(prev_server); + } + else if (prev_server != &bes) { + /* If the current RT server is not valid for slack, expire it */ + task_t * served_p = prev_server->sid?find_task_by_pid(prev_server->sid) : NULL; + switch (prev_server->policy){ + case SCHED_BACKSLASH: + /* RBED SLASH server idles only if there is no + * pending task and server is extended at least + * once already */ + if (((long long)(prev_server->deadline - now) + >= (long long)(prev_server->period)) && (!served_p || served_p->state)){ + expire_server(prev_server); + + if(prev_server && !prev_server->inexpiredqueue && + prev_server->budget > (long)prev_server->left_time) + enqueue_expired_server(prev_server); + } + break; + case SCHED_CBS: + case SCHED_EDF: + /* CBS server idles as soon as there is no pending task */ + if(!served_p || served_p->state) + expire_server(prev_server); + break; + case SCHED_CASH: + /* CASH server can be always used as slack server: + * put into cash queue */ + if(!served_p || served_p->state) + prev_server->incashqueue = 1; + break; + } + } +} + + +/* It picks the server with earliest deadline and prepares back donation if + * necessary. If the first server has associated process, schedule it else + * picks the first process. + * We need to consider if a new process has early deadline + */ +static inline server_t* edf_schedule_server (server_t *current_server, + runqueue_t *rq, unsigned long long now, long long *laxity) +{ + struct list_head * s_hd = &server_head; + server_t * valid_server = current_server; + + /* If pure-EDF policy, skip expired servers */ + if(scheduling_policy == SCHED_EDF && nr_server >1){ + server_t * tmp_server = current_server; + valid_server = NULL; + while(tmp_server){ + if(tmp_server == &bes || + (long long)(tmp_server->deadline - now) < (long long)tmp_server->period){ + /* Found a valid (un-expired) server */ + if(!valid_server) + valid_server = tmp_server; + /* Once we found a valid server, we break out: added newly on 5/11/05 */ + break; + } + else{ /* If there is expired SlAD server, get laxity */ + long long this_laxity=tmp_server->deadline - now - tmp_server->period; + if (*laxity==-1 || *laxity > this_laxity){ + *laxity = this_laxity; + } + } + /* Go to the next server */ + s_hd = s_hd->next; + if(s_hd->next==&server_head) + break; + tmp_server = list_entry(s_hd->next, server_t, server_list); + } + + if(!valid_server) + printk("ERROR: valid_server is null"); + } + + if(scheduling_policy == SCHED_EDF && current_server != valid_server){ + if(valid_server == &bes) + valid_server->p = NULL; + current_server = valid_server; + } + + /* This checks and prepares slack back donation */ + normal_schedule = 0; + if(scheduling_policy == SCHED_BACKSLASH && nr_expired_server > 0){ + task_t * cur_served_p = current_server->sid?find_task_by_pid(current_server->sid) : NULL; + + /* If slack server/BEserver, normal schedule if necessary */ + if( current_server != &bes && (!cur_served_p || cur_served_p->state)){ + + struct list_head * s_ehd = &expired_server_head; + struct list_head * i; + long borrowed = 0; + list_for_each(i, s_ehd) + { + server_t *s= list_entry(i, server_t, expired_server_list); + /* Double check if still expired */ + if((long long)(s->deadline - now)<=(long long)s->period || + s->budget == s->left_time){ + i=i->prev; + dequeue_expired_server(s); + continue; + } + + borrowed += s->budget - s->left_time; + } + + /* If less than a context switch, skip */ + if(borrowed > context_switch_in_ns){ + server_t * normal_server = NULL; + normal_schedule = 1; + *laxity = current_server->left_time; + /* Why not use the following? */ + /* laxity = min (borrowed, current_server->left_time); */ + back_donate_server = current_server; + normal_server = get_normal_server(); + if(normal_server) + current_server = normal_server; + } + } + } + + return current_server; +} + + +static inline task_t *edf_schedule_task( server_t *current_server, + runqueue_t *rq, struct list_head *queue, long long laxity) +{ + task_t* next; + + //Start slack schedule: separate operation on bes and rt servers + //1. If bes, go through the runqueue, find the first be task + //2. If rt server, get the first task in the runqueue + next = rq->idle; + if(current_server == &bes && rq->nr_running - rq->nr_running_rt > 0 && laxity==-1){ + struct list_head *i; + + /* Note: we can simplify this search by providing a be head */ + list_for_each(i, queue) + { + task_t *t= list_entry(i, task_t, run_list); + /* 'deadline==-1' indicates it's a be task */ + if((long long)t->edf_deadline == -1 && t!=rq->idle){ + next = t; + break; + } + } + } + else{ + struct list_head *i; + list_for_each(i, queue) + { + task_t *t= list_entry(i, task_t, run_list); + if(next==rq->idle){ + next = t; + continue; + } + + if(t->edf_deadline < next->edf_deadline) + next = t; + } + } + return next; +} + + +static inline void prepare_oneshot_timer(server_t *current_server, task_t *next, + unsigned long long now, unsigned long long now_in_tsc, long long laxity) +{ + current_server->timestamp = now; + current_server->rtimestamp = now; + current_server->p = next; + + next->current_server = current_server; + + current_server->one_shot_time = current_server->left_time; + laxity_schedule = 0; + /* Check if a RBED server has an earlier reschedule point */ + if(laxity > 0 && laxity < current_server->one_shot_time){ + current_server->one_shot_time = laxity; + laxity_schedule = 1; + } + slack = 0; + + /* Startover the oneshot timer */ + apic_start(current_server, now_in_tsc); +} + +static inline void measure_overhead(unsigned long long now, runqueue_t *rq) +{ + /* Measure the scheduler overhead, output in expire_server() in rbed.c */ + if (scheduling_policy == SCHED_BACKSLASH || + scheduling_policy == SCHED_CBS || + scheduling_policy == SCHED_EDF || + scheduling_policy == SCHED_CASH ){ + schedule_count++; + total_schedule_time += (sched_clock() - now); + if(!schedule_context_count) + schedule_context_count = rq->nr_switches; + } +} + + +asmlinkage void __sched schedule(void) +{ + long *switch_count; + task_t *prev, *next; + runqueue_t *rq; + prio_array_t *array; + struct list_head *queue; + unsigned long long now, now_in_tsc; + server_t * prev_server, * current_server; + long long counter, laxity; + int cpu; + + /* + * Test if we are atomic. Since do_exit() needs to call into + * schedule() atomically, we ignore that path for now. + * Otherwise, whine if we are scheduling when we should not be. + */ + if (likely(!(current->state & (TASK_DEAD | TASK_ZOMBIE)))) { + if (unlikely(in_atomic())) { + printk(KERN_ERR "bad: scheduling while atomic!\n"); + dump_stack(); + BUG_ON(1); + } + } + +need_resched: + preempt_disable(); + prev = current; + rq = this_rq(); + + release_kernel_lock(prev); + now = sched_clock(); + rdtscll(now_in_tsc); + /* Apic timer is up */ + if(APIC_UP(apic_flag)){ + /* Stop the one-shot timer now: too much overhead? */ + apic_over(prev->current_server); + prev_server=NULL; + current_server=NULL; + laxity = -1; + + /* First time, apic timer not ready yet */ + if(apic_flag == APIC_DELAY){ + apic_flag = APIC_DELAY + 1; + /* Wake up the best-effort (be) server */ + if(!bes.inqueue) + wake_up_server(&bes); + prev_server = NULL; + } + /* Apic timer is ready to use*/ + else if (prev->current_server) + prepare_schedule(prev_server, prev, rq, now); + } + else{ + /* Update the prev tasks usage counters */ + if(prev != rq->idle) { + unsigned long run_time = (now - prev->timestamp); + run_time /= 1000; // convert from ns to us + counter = prev->time_slice - run_time; + if( counter <= 0 ) { + counter = 0; + } + prev->time_slice = counter; + } + } + + spin_lock_irq(&rq->lock); + + /* + * If entering off of a kernel preemption go straight + * to picking the next task. + */ + switch_count = &prev->nivcsw; + if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { + switch_count = &prev->nvcsw; + if (unlikely((prev->state & TASK_INTERRUPTIBLE) && + unlikely(signal_pending(prev)))){ + prev->state = TASK_RUNNING; + //Unlikely, what is the following? + prev->server->p = prev; + printk("Why am I here?\n"); + } + else + deactivate_task(prev, rq); + } + + /* Pick the server with earliest deadline to use CPU */ + if(APIC_UP(apic_flag)){ + struct list_head * s_hd = &server_head; + current_server = list_entry(s_hd->next, server_t, server_list); + current_server = edf_schedule_server(current_server, rq, now, &laxity); + + if(current_server->p && current_server->p != rq->idle && + !current_server->p->state && + !(current_server->policy == SCHED_CASH && + current_server->incashqueue)){ + next = current_server->p; + goto switch_tasks; + } + } + + + if (unlikely(!rq->nr_running)) { +#ifdef CONFIG_SMP + load_balance(rq, 1, cpu_to_node_mask(smp_processor_id())); +#endif + if (unlikely(!rq->nr_running)) { + next = rq->idle; + rq->expired_timestamp = 0; + goto switch_tasks; + } + } + + array = rq->active; + queue = array->queue; + + /* Pick the task with earliest deadline to run on CPU */ + if(APIC_UP(apic_flag)) + next = edf_schedule_task(current_server, rq, queue, laxity); + else + next = list_entry(queue->next, task_t, run_list); + + /* What is this? */ + if (dependent_sleeper(cpu, rq, next)) { + next = rq->idle; + goto switch_tasks; + } + +switch_tasks: + prefetch(next); + clear_tsk_need_resched(prev); + RCU_qsctr(task_cpu(prev))++; + + /* Starts the oneshot apic timer */ + if(APIC_UP(apic_flag)){ + prepare_oneshot_timer(current_server, next, now, now_in_tsc, laxity); + measure_overhead(now, rq); + } + + prev->timestamp = now; + + if (likely(prev != next)) { + next->timestamp = now; + rq->nr_switches++; + rq->curr = next; + ++*switch_count; + + prepare_arch_switch(rq, next); + prev = context_switch(rq, prev, next); + barrier(); + + finish_task_switch(prev); + } else{ + spin_unlock_irq(&rq->lock); + } + + + reacquire_kernel_lock(current); + preempt_enable_no_resched(); + if (test_thread_flag(TIF_NEED_RESCHED)) + goto need_resched; +} +#endif EXPORT_SYMBOL(schedule); #ifdef CONFIG_PREEMPT @@ -2709,6 +3559,7 @@ /* * setscheduler - change the scheduling policy and/or RT priority of a thread. */ +#ifndef CONFIG_RBED static int setscheduler(pid_t pid, int policy, struct sched_param __user *param) { struct sched_param lp; @@ -2802,6 +3653,111 @@ out_nounlock: return retval; } +#else // RBED +static int setscheduler(pid_t pid, int policy, struct sched_param __user *param) +{ + struct rbed_sched_param lp; + int retval = -EINVAL; + prio_array_t *array; + unsigned long flags; + runqueue_t *rq; + task_t *p; + server_t *s = NULL; + + if (!param || pid < 0) + goto out_nounlock; + + retval = -EFAULT; + if (copy_from_user(&lp, param, sizeof(struct rbed_sched_param))) + goto out_nounlock; + + /* + * We play safe to avoid deadlocks. + */ + read_lock_irq(&tasklist_lock); + + p = find_process_by_pid(pid); + + retval = -ESRCH; + if (!p) + goto out_unlock_tasklist; + + /* + * To be able to change p->policy safely, the apropriate + * runqueue lock must be held. + */ + rq = task_rq_lock(p, &flags); + + + if (policy < 0) + policy = p->policy; + else { + retval = -EINVAL; + if (policy != SCHED_BACKSLASH && + policy != SCHED_CBS && + policy != SCHED_EDF && + policy != SCHED_CASH && + policy != SCHED_NORMAL) + goto out_unlock; + } + + retval = -EINVAL; + + array = p->array; + if (array) + deactivate_task(p, task_rq(p)); + retval = 0; + + /* for now it must be this policy */ + if (policy == SCHED_BACKSLASH || + policy == SCHED_CBS || + policy == SCHED_EDF || + policy == SCHED_CASH) + { + p->policy = policy; + retval = set_rt_task(p,s,lp); + } + + __setscheduler(p, policy, p->static_prio); + if (array) { + if ((policy == SCHED_BACKSLASH || policy == SCHED_CBS || + policy == SCHED_EDF || policy == SCHED_CASH) && retval>=0){ + /* enqueue the real-time task and its server */ + __activate_task_rt(p, task_rq(p)); + /* since bes parameter was changed in set_rt_task(), + * we need to re-insert it to the server list */ + resort_server(&bes); + //if(s) enqueue_server(s); //done in __activate_task_rt(); + + scheduling_policy = policy; + + /* initialize overhead variables */ + print_overhead = 1; + schedule_count = 0; + total_schedule_time = 0; + schedule_context_count = 0; + } + else + __activate_task(p, task_rq(p)); + /* + * Do we need reschedule here? + */ + if (rq->curr == p) { + resched_task(rq->curr); + } + /* otherwise, release job already set the preempt flag */ + } + +out_unlock: + task_rq_unlock(rq, &flags); +out_unlock_tasklist: + read_unlock_irq(&tasklist_lock); + +out_nounlock: + return retval; +} +#endif + /** * sys_sched_setscheduler - set/change the scheduler policy and RT priority @@ -3002,6 +3958,7 @@ * to the expired array. If there are no other threads running on this * CPU then this function will return. */ +#ifndef CONFIG_RBED // normal linux asmlinkage long sys_sched_yield(void) { runqueue_t *rq = this_rq_lock(); @@ -3032,6 +3989,21 @@ return 0; } +#else // RBED +asmlinkage long sys_sched_yield(void) +{ + runqueue_t *rq = this_rq_lock(); + + //current->state |= TASK_EXPIRED; + + _raw_spin_unlock(&rq->lock); + preempt_enable_no_resched(); + + schedule(); + + return 0; +} +#endif void __sched __cond_resched(void) { @@ -3907,6 +4879,7 @@ && addr < (unsigned long)__sched_text_end; } +#ifndef CONFIG_RBED // normal linux void __init sched_init(void) { runqueue_t *rq; @@ -3930,6 +4903,7 @@ sched_group_init.cpu_power = SCHED_LOAD_SCALE; #endif + for (i = 0; i < NR_CPUS; i++) { prio_array_t *array; @@ -3975,6 +4949,49 @@ atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current); } +#else // RBED +void __init sched_init(void) +{ + runqueue_t *rq; + int i, j; + + for (i = 0; i < NR_CPUS; i++) { + prio_array_t *array; + + rq = cpu_rq(i); + spin_lock_init(&rq->lock); + rq->active = rq->arrays; + rq->expired = rq->arrays + 1; + rq->best_expired_prio = MAX_PRIO; + + atomic_set(&rq->nr_iowait, 0); + + for (j = 0; j < 2; j++) { + array = rq->arrays + j; + INIT_LIST_HEAD(array->queue); + } + } + + /* + * We have to do a little magic to get the first + * thread right in SMP mode. + */ + rq = this_rq(); + rq->curr = current; + rq->idle = current; + set_task_cpu(current, smp_processor_id()); + + rbed_init( rq->idle ); + + wake_up_forked_process(current); + + /* + * The boot idle thread does lazy MMU switching as well: + */ + atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current); +} +#endif #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP void __might_sleep(char *file, int line)