/**
* acpi_idle_enter_bm - enters C3 with proper BM handling
* @dev: the target CPU
* @drv: cpuidle driver containing state data
* @index: the index of suggested state
*
* If BM is detected, the deepest non-C3 idle state is entered instead.
*/
static int acpi_idle_enter_bm(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
struct acpi_processor *pr;
struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
pr = __this_cpu_read(processors);
if (unlikely(!pr))
return -EINVAL;
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
if (drv->safe_state_index >= 0) {
return drv->states[drv->safe_state_index].enter(dev,
drv, drv->safe_state_index);
} else {
acpi_safe_halt();
return -EBUSY;
}
}
if (cx->entry_method != ACPI_CSTATE_FFH) {
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we test
* NEED_RESCHED:
*/
smp_mb();
if (unlikely(need_resched())) {
current_thread_info()->status |= TS_POLLING;
return -EINVAL;
}
}
acpi_unlazy_tlb(smp_processor_id());
/* Tell the scheduler that we are going deep-idle: */
sched_clock_idle_sleep_event();
/*
* Must be done before busmaster disable as we might need to
* access HPET !
*/
lapic_timer_state_broadcast(pr, cx, 1);
/*
* disable bus master
* bm_check implies we need ARB_DIS
* !bm_check implies we need cache flush
* bm_control implies whether we can do ARB_DIS
*
* That leaves a case where bm_check is set and bm_control is
* not set. In that case we cannot do much, we enter C3
* without doing anything.
*/
if (pr->flags.bm_check && pr->flags.bm_control) {
raw_spin_lock(&c3_lock);
c3_cpu_count++;
/* Disable bus master arbitration when all CPUs are in C3 */
if (c3_cpu_count == num_online_cpus())
acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
raw_spin_unlock(&c3_lock);
} else if (!pr->flags.bm_check) {
ACPI_FLUSH_CPU_CACHE();
}
acpi_idle_do_entry(cx);
/* Re-enable bus master arbitration */
if (pr->flags.bm_check && pr->flags.bm_control) {
raw_spin_lock(&c3_lock);
acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
c3_cpu_count--;
raw_spin_unlock(&c3_lock);
}
sched_clock_idle_wakeup_event(0);
if (cx->entry_method != ACPI_CSTATE_FFH)
current_thread_info()->status |= TS_POLLING;
lapic_timer_state_broadcast(pr, cx, 0);
return index;
}
/**
* cppc_set_perf - Set a CPUs performance controls.
* @cpu: CPU for which to set performance controls.
* @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h
*
* Return: 0 for success, -ERRNO otherwise.
*/
int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
{
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
struct cpc_register_resource *desired_reg;
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cppc_pcc_data *pcc_ss_data = NULL;
int ret = 0;
if (!cpc_desc) {
pr_debug("No CPC descriptor for CPU:%d\n", cpu);
return -ENODEV;
}
desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
/*
* This is Phase-I where we want to write to CPC registers
* -> We want all CPUs to be able to execute this phase in parallel
*
* Since read_lock can be acquired by multiple CPUs simultaneously we
* achieve that goal here
*/
if (CPC_IN_PCC(desired_reg)) {
if (pcc_ss_id < 0) {
pr_debug("Invalid pcc_ss_id\n");
return -ENODEV;
}
pcc_ss_data = pcc_data[pcc_ss_id];
down_read(&pcc_ss_data->pcc_lock); /* BEGIN Phase-I */
if (pcc_ss_data->platform_owns_pcc) {
ret = check_pcc_chan(pcc_ss_id, false);
if (ret) {
up_read(&pcc_ss_data->pcc_lock);
return ret;
}
}
/*
* Update the pending_write to make sure a PCC CMD_READ will not
* arrive and steal the channel during the switch to write lock
*/
pcc_ss_data->pending_pcc_write_cmd = true;
cpc_desc->write_cmd_id = pcc_ss_data->pcc_write_cnt;
cpc_desc->write_cmd_status = 0;
}
/*
* Skip writing MIN/MAX until Linux knows how to come up with
* useful values.
*/
cpc_write(cpu, desired_reg, perf_ctrls->desired_perf);
if (CPC_IN_PCC(desired_reg))
up_read(&pcc_ss_data->pcc_lock); /* END Phase-I */
/*
* This is Phase-II where we transfer the ownership of PCC to Platform
*
* Short Summary: Basically if we think of a group of cppc_set_perf
* requests that happened in short overlapping interval. The last CPU to
* come out of Phase-I will enter Phase-II and ring the doorbell.
*
* We have the following requirements for Phase-II:
* 1. We want to execute Phase-II only when there are no CPUs
* currently executing in Phase-I
* 2. Once we start Phase-II we want to avoid all other CPUs from
* entering Phase-I.
* 3. We want only one CPU among all those who went through Phase-I
* to run phase-II
*
* If write_trylock fails to get the lock and doesn't transfer the
* PCC ownership to the platform, then one of the following will be TRUE
* 1. There is at-least one CPU in Phase-I which will later execute
* write_trylock, so the CPUs in Phase-I will be responsible for
* executing the Phase-II.
* 2. Some other CPU has beaten this CPU to successfully execute the
* write_trylock and has already acquired the write_lock. We know for a
* fact it(other CPU acquiring the write_lock) couldn't have happened
* before this CPU's Phase-I as we held the read_lock.
* 3. Some other CPU executing pcc CMD_READ has stolen the
* down_write, in which case, send_pcc_cmd will check for pending
* CMD_WRITE commands by checking the pending_pcc_write_cmd.
* So this CPU can be certain that its request will be delivered
* So in all cases, this CPU knows that its request will be delivered
* by another CPU and can return
*
* After getting the down_write we still need to check for
* pending_pcc_write_cmd to take care of the following scenario
* The thread running this code could be scheduled out between
* Phase-I and Phase-II. Before it is scheduled back on, another CPU
* could have delivered the request to Platform by triggering the
* doorbell and transferred the ownership of PCC to platform. So this
* avoids triggering an unnecessary doorbell and more importantly before
* triggering the doorbell it makes sure that the PCC channel ownership
* is still with OSPM.
//.........这里部分代码省略.........
/**
* acpi_get_psd_map - Map the CPUs in a common freq domain.
* @all_cpu_data: Ptrs to CPU specific CPPC data including PSD info.
*
* Return: 0 for success or negative value for err.
*/
int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
{
int count_target;
int retval = 0;
unsigned int i, j;
cpumask_var_t covered_cpus;
struct cppc_cpudata *pr, *match_pr;
struct acpi_psd_package *pdomain;
struct acpi_psd_package *match_pdomain;
struct cpc_desc *cpc_ptr, *match_cpc_ptr;
if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
return -ENOMEM;
/*
* Now that we have _PSD data from all CPUs, lets setup P-state
* domain info.
*/
for_each_possible_cpu(i) {
pr = all_cpu_data[i];
if (!pr)
continue;
if (cpumask_test_cpu(i, covered_cpus))
continue;
cpc_ptr = per_cpu(cpc_desc_ptr, i);
if (!cpc_ptr) {
retval = -EFAULT;
goto err_ret;
}
pdomain = &(cpc_ptr->domain_info);
cpumask_set_cpu(i, pr->shared_cpu_map);
cpumask_set_cpu(i, covered_cpus);
if (pdomain->num_processors <= 1)
continue;
/* Validate the Domain info */
count_target = pdomain->num_processors;
if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
pr->shared_type = CPUFREQ_SHARED_TYPE_ALL;
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
pr->shared_type = CPUFREQ_SHARED_TYPE_HW;
else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
pr->shared_type = CPUFREQ_SHARED_TYPE_ANY;
for_each_possible_cpu(j) {
if (i == j)
continue;
match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
if (!match_cpc_ptr) {
retval = -EFAULT;
goto err_ret;
}
match_pdomain = &(match_cpc_ptr->domain_info);
if (match_pdomain->domain != pdomain->domain)
continue;
/* Here i and j are in the same domain */
if (match_pdomain->num_processors != count_target) {
retval = -EFAULT;
goto err_ret;
}
if (pdomain->coord_type != match_pdomain->coord_type) {
retval = -EFAULT;
goto err_ret;
}
cpumask_set_cpu(j, covered_cpus);
cpumask_set_cpu(j, pr->shared_cpu_map);
}
for_each_possible_cpu(j) {
if (i == j)
continue;
match_pr = all_cpu_data[j];
if (!match_pr)
continue;
match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
if (!match_cpc_ptr) {
retval = -EFAULT;
goto err_ret;
}
match_pdomain = &(match_cpc_ptr->domain_info);
if (match_pdomain->domain != pdomain->domain)
continue;
//.........这里部分代码省略.........
/**
* omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
* The purpose of this function is to manage low power programming
* of OMAP4 MPUSS subsystem
* @cpu : CPU ID
* @power_state: Low power state.
*
* MPUSS states for the context save:
* save_state =
* 0 - Nothing lost and no need to save: MPUSS INACTIVE
* 1 - CPUx L1 and logic lost: MPUSS CSWR
* 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
* 3 - CPUx L1 and logic lost + GIC + L2 lost: DEVICE OFF
*/
int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
{
struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu);
unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET;
unsigned int wakeup_cpu;
if (omap_rev() == OMAP4430_REV_ES1_0)
return -ENXIO;
switch (power_state) {
case PWRDM_POWER_ON:
case PWRDM_POWER_INACTIVE:
save_state = 0;
break;
case PWRDM_POWER_OFF:
cpu_logic_state = PWRDM_POWER_OFF;
save_state = 1;
break;
case PWRDM_POWER_RET:
if (IS_PM44XX_ERRATUM(PM_OMAP4_CPU_OSWR_DISABLE)) {
save_state = 0;
break;
}
default:
/*
* CPUx CSWR is invalid hardware state. Also CPUx OSWR
* doesn't make much scense, since logic is lost and $L1
* needs to be cleaned because of coherency. This makes
* CPUx OSWR equivalent to CPUX OFF and hence not supported
*/
WARN_ON(1);
return -ENXIO;
}
pwrdm_pre_transition(NULL);
/*
* Check MPUSS next state and save interrupt controller if needed.
* In MPUSS OSWR or device OFF, interrupt controller contest is lost.
*/
mpuss_clear_prev_logic_pwrst();
if ((pwrdm_read_next_pwrst(mpuss_pd) == PWRDM_POWER_RET) &&
(pwrdm_read_logic_retst(mpuss_pd) == PWRDM_POWER_OFF))
save_state = 2;
cpu_clear_prev_logic_pwrst(cpu);
pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state);
set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.resume));
omap_pm_ops.scu_prepare(cpu, power_state);
l2x0_pwrst_prepare(cpu, save_state);
/*
* Call low level function with targeted low power state.
*/
if (save_state)
cpu_suspend(save_state, omap_pm_ops.finish_suspend);
else
omap_pm_ops.finish_suspend(save_state);
if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && cpu)
gic_dist_enable();
/*
* Restore the CPUx power state to ON otherwise CPUx
* power domain can transitions to programmed low power
* state while doing WFI outside the low powe code. On
* secure devices, CPUx does WFI which can result in
* domain transition
*/
wakeup_cpu = smp_processor_id();
pwrdm_set_next_pwrst(pm_info->pwrdm, PWRDM_POWER_ON);
pwrdm_post_transition(NULL);
return 0;
}
/*
* Every sampling_rate, we check, if current idle time is less than 20%
* (default), then we try to increase frequency. Every sampling_rate, we look
* for the lowest frequency which can sustain the load while keeping idle time
* over 30%. If such a frequency exist, we try to decrease to this frequency.
*
* Any frequency increase takes it to the maximum frequency. Frequency reduction
* happens at minimum steps of 5% (default) of current frequency
*/
static void od_check_cpu(int cpu, unsigned int load_freq)
{
struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
struct dbs_data *dbs_data = policy->governor_data;
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
dbs_info->freq_lo = 0;
/* Check for frequency increase */
#ifdef CONFIG_ARCH_HI6XXX
if(load_freq > od_tuners->od_6xxx_up_threshold * policy->cur) {
unsigned int freq_next;
/* If increase speed, apply sampling_down_factor */
if (policy->cur < policy->max)
dbs_info->rate_mult =
od_tuners->sampling_down_factor;
if (load_freq > od_tuners->up_threshold * policy->cur)
freq_next = policy->max;
else
freq_next = load_freq / od_tuners->od_6xxx_up_threshold;
dbs_freq_increase(policy, freq_next);
return;
}
#else
if (load_freq > od_tuners->up_threshold * policy->cur) {
/* If switching to max speed, apply sampling_down_factor */
if (policy->cur < policy->max)
dbs_info->rate_mult =
od_tuners->sampling_down_factor;
dbs_freq_increase(policy, policy->max);
return;
}
#endif
/* Check for frequency decrease */
/* if we cannot reduce the frequency anymore, break out early */
if (policy->cur == policy->min)
return;
/*
* The optimal frequency is the frequency that is the lowest that can
* support the current CPU usage without triggering the up policy. To be
* safe, we focus 10 points under the threshold.
*/
#ifdef CONFIG_ARCH_HI6XXX
if (load_freq < od_tuners->od_6xxx_down_threshold
* policy->cur) {
unsigned int freq_next;
freq_next = load_freq / od_tuners->od_6xxx_down_threshold;
#else
if (load_freq < od_tuners->adj_up_threshold
* policy->cur) {
unsigned int freq_next;
freq_next = load_freq / od_tuners->adj_up_threshold;
#endif
/* No longer fully busy, reset rate_mult */
dbs_info->rate_mult = 1;
if (freq_next < policy->min)
freq_next = policy->min;
if (!od_tuners->powersave_bias) {
__cpufreq_driver_target(policy, freq_next,
CPUFREQ_RELATION_L);
return;
}
freq_next = od_ops.powersave_bias_target(policy, freq_next,
CPUFREQ_RELATION_L);
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
}
static void od_dbs_timer(struct work_struct *work)
{
struct od_cpu_dbs_info_s *dbs_info =
container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info,
cpu);
struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
int delay = 0, sample_type = core_dbs_info->sample_type;
bool modify_all = true;
mutex_lock(&core_dbs_info->cdbs.timer_mutex);
if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) {
modify_all = false;
//.........这里部分代码省略.........
/*
* switch_to(x,y) should switch tasks from x to y.
*
* We fsave/fwait so that an exception goes off at the right time
* (as a call from the fsave or fwait in effect) rather than to
* the wrong process. Lazy FP saving no longer makes any sense
* with modern CPU's, and this simplifies a lot of things (SMP
* and UP become the same).
*
* NOTE! We used to use the x86 hardware context switching. The
* reason for not using it any more becomes apparent when you
* try to recover gracefully from saved state that is no longer
* valid (stale segment register values in particular). With the
* hardware task-switch, there is no way to fix up bad state in
* a reasonable manner.
*
* The fact that Intel documents the hardware task-switching to
* be slow is a fairly red herring - this code is not noticeably
* faster. However, there _is_ some room for improvement here,
* so the performance issues may eventually be a valid point.
* More important, however, is the fact that this allows us much
* more flexibility.
*
* The return value (in %ax) will be the "prev" task after
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
* Reload esp0.
*/
load_sp0(tss, next);
/*
* Save away %gs. No need to save %fs, as it was saved on the
* stack on entry. No need to save %es and %ds, as those are
* always kernel segments while inside the kernel. Doing this
* before setting the new TLS descriptors avoids the situation
* where we temporarily have non-reloadable segments in %fs
* and %gs. This could be an issue if the NMI handler ever
* used %fs or %gs (it does not today), or if the kernel is
* running inside of a hypervisor layer.
*/
lazy_save_gs(prev->gs);
/*
* Load the per-thread Thread-Local Storage descriptor.
*/
load_TLS(next, cpu);
/*
* Restore IOPL if needed. In normal use, the flags restore
* in the switch assembly will handle this. But if the kernel
* is running virtualized at a non-zero CPL, the popf will
* not restore flags, so it must be done in a separate step.
*/
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
set_iopl_mask(next->iopl);
/*
* Now maybe handle debug registers and/or IO bitmaps
*/
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
* the GDT and LDT are properly updated, and must be
* done before math_state_restore, so the TS bit is up
* to date.
*/
arch_end_context_switch(next_p);
/*
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
switch_fpu_finish(next_p, fpu);
percpu_write(current_task, next_p);
return prev_p;
}
static void
smp_callin (void)
{
int cpuid, phys_id, itc_master;
struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
extern void ia64_init_itm(void);
extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
#endif
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
phys_id, cpuid);
BUG();
}
fix_b0_for_bsp();
/*
* numa_node_id() works after this.
*/
set_numa_node(cpu_to_node_map[cpuid]);
set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
ipi_call_lock_irq();
spin_lock(&vector_lock);
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
notify_cpu_starting(cpuid);
set_cpu_online(cpuid, true);
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
ipi_call_unlock_irq();
smp_setup_percpu_timer();
ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
#ifdef CONFIG_PERFMON
pfm_init_percpu();
#endif
local_irq_enable();
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
/*
* Synchronize the ITC with the BP. Need to do this after irqs are
* enabled because ia64_sync_itc() calls smp_call_function_single(), which
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
Dprintk("Going to syncup ITC with ITC Master.\n");
ia64_sync_itc(itc_master);
}
/*
* Get our bogomips.
*/
ia64_init_itm();
/*
* Delay calibration can be skipped if new processor is identical to the
* previous processor.
*/
last_cpuinfo = cpu_data(cpuid - 1);
this_cpuinfo = local_cpu_data;
if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
last_cpuinfo->features != this_cpuinfo->features ||
last_cpuinfo->revision != this_cpuinfo->revision ||
last_cpuinfo->family != this_cpuinfo->family ||
last_cpuinfo->archrev != this_cpuinfo->archrev ||
last_cpuinfo->model != this_cpuinfo->model)
calibrate_delay();
local_cpu_data->loops_per_jiffy = loops_per_jiffy;
/*
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);
Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
}
/*
* get the current cpu vdd;
* return: cpu vdd, based on mv;
*/
static int sunxi_cpufreq_getvolt(unsigned int cpu)
{
u32 cur_cluster = per_cpu(physical_cluster, cpu);
return regulator_get_voltage(cpu_vdd[cur_cluster]) / 1000;
}
static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
unsigned long rcu_delta_jiffies;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
u64 time_delta;
int cpu;
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
now = tick_nohz_start_idle(cpu, ts);
/*
* If this cpu is offline and it is the one which updates
* jiffies, then give up the assignment and let it be taken by
* the cpu which runs the tick timer next. If we don't drop
* this here the jiffies might be stale and do_timer() never
* invoked.
*/
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
return;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
return;
if (need_resched())
return;
if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
static int ratelimit;
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
(unsigned int) local_softirq_pending());
ratelimit++;
}
return;
}
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
time_delta = timekeeping_max_deferment();
} while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
arch_needs_cpu(cpu)) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {
/* Get the next timer wheel timer */
next_jiffies = get_next_timer_interrupt(last_jiffies);
delta_jiffies = next_jiffies - last_jiffies;
if (rcu_delta_jiffies < delta_jiffies) {
next_jiffies = last_jiffies + rcu_delta_jiffies;
delta_jiffies = rcu_delta_jiffies;
}
}
/*
* Do not stop the tick, if we are only one off
* or if the cpu is required for rcu
*/
if (!ts->tick_stopped && delta_jiffies == 1)
goto out;
/* Schedule the tick, if we are at least one jiffie off */
if ((long)delta_jiffies >= 1) {
/*
* If this cpu is the one which updates jiffies, then
* give up the assignment and let it be taken by the
* cpu which runs the tick timer next, which might be
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
* invoked. Keep track of the fact that it was the one
* which had the do_timer() duty last. If this cpu is
* the one which had the do_timer() duty last, we
* limit the sleep time to the timekeeping
* max_deferement value which we retrieved
* above. Otherwise we can sleep as long as we want.
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
time_delta = KTIME_MAX;
ts->do_timer_last = 0;
} else if (!ts->do_timer_last) {
time_delta = KTIME_MAX;
}
/*
//.........这里部分代码省略.........
请发表评论