do_fork-->copy_process(二)

发布时间 2024-01-05 19:14:28作者: gaozy6626

 

  1 /*
  2  * This creates a new process as a copy of the old one,
  3  * but does not actually start it yet.
  4  *
  5  * It copies the registers, and all the appropriate
  6  * parts of the process environment (as per the clone
  7  * flags). The actual kick-off is left to the caller.
  8  */
  9 struct task_struct *copy_process(unsigned long clone_flags,
 10                  unsigned long stack_start,
 11                  struct pt_regs *regs,
 12                  unsigned long stack_size,
 13                  int __user *parent_tidptr,
 14                  int __user *child_tidptr)
 15 {
 16     int retval;
 17     struct task_struct *p = NULL;
 18 
 19     if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 20         return ERR_PTR(-EINVAL);
 21 
 22     /*
 23      * Thread groups must share signals as well, and detached threads
 24      * can only be started up within the thread group.
 25      */
 26     if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
 27         return ERR_PTR(-EINVAL);
 28 
 29     /*
 30      * Shared signal handlers imply shared VM. By way of the above,
 31      * thread groups also imply shared VM. Blocking this case allows
 32      * for various simplifications in other code.
 33      */
 34     if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 35         return ERR_PTR(-EINVAL);
 36 
 37     /*
 38      * CLONE_DETACHED must match CLONE_THREAD: it's a historical
 39      * thing.
 40      */
 41     if (!(clone_flags & CLONE_DETACHED) != !(clone_flags & CLONE_THREAD)) {
 42         /* Warn about the old no longer supported case so that we see it */
 43         if (clone_flags & CLONE_THREAD) {
 44             static int count;
 45             if (count < 5) {
 46                 count++;
 47                 printk(KERN_WARNING "%s trying to use CLONE_THREAD without CLONE_DETACH\n", current->comm);
 48             }
 49         }
 50         return ERR_PTR(-EINVAL);
 51     }
 52 
 53     retval = security_task_create(clone_flags);
 54     if (retval)
 55         goto fork_out;
 56 
 57     retval = -ENOMEM;
 58     p = dup_task_struct(current);
 59     if (!p)
 60         goto fork_out;
 61 
 62     retval = -EAGAIN;
 63     if (atomic_read(&p->user->processes) >=
 64             p->rlim[RLIMIT_NPROC].rlim_cur) {
 65         if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
 66                 p->user != &root_user)
 67             goto bad_fork_free;
 68     }
 69 
 70     atomic_inc(&p->user->__count);
 71     atomic_inc(&p->user->processes);
 72 
 73     /*
 74      * If multiple threads are within copy_process(), then this check
 75      * triggers too late. This doesn't hurt, the check is only there
 76      * to stop root fork bombs.
 77      */
 78     if (nr_threads >= max_threads)
 79         goto bad_fork_cleanup_count;
 80 
 81     if (!try_module_get(p->thread_info->exec_domain->module))
 82         goto bad_fork_cleanup_count;
 83 
 84     if (p->binfmt && !try_module_get(p->binfmt->module))
 85         goto bad_fork_cleanup_put_domain;
 86 
 87 #ifdef CONFIG_PREEMPT
 88     /*
 89      * schedule_tail drops this_rq()->lock so we compensate with a count
 90      * of 1.  Also, we want to start with kernel preemption disabled.
 91      */
 92     p->thread_info->preempt_count = 1;
 93 #endif
 94     p->did_exec = 0;
 95     p->state = TASK_UNINTERRUPTIBLE;
 96 
 97     copy_flags(clone_flags, p);
 98     if (clone_flags & CLONE_IDLETASK)
 99         p->pid = 0;
100     else {
101         p->pid = alloc_pidmap();
102         if (p->pid == -1)
103             goto bad_fork_cleanup;
104     }
105     retval = -EFAULT;
106     if (clone_flags & CLONE_PARENT_SETTID)
107         if (put_user(p->pid, parent_tidptr))
108             goto bad_fork_cleanup;
109 
110     p->proc_dentry = NULL;
111 
112     INIT_LIST_HEAD(&p->run_list);
113 
114     INIT_LIST_HEAD(&p->children);
115     INIT_LIST_HEAD(&p->sibling);
116     INIT_LIST_HEAD(&p->posix_timers);
117     init_waitqueue_head(&p->wait_chldexit);
118     p->vfork_done = NULL;
119     spin_lock_init(&p->alloc_lock);
120     spin_lock_init(&p->switch_lock);
121     spin_lock_init(&p->proc_lock);
122 
123     clear_tsk_thread_flag(p, TIF_SIGPENDING);
124     init_sigpending(&p->pending);
125 
126     p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
127     p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
128     init_timer(&p->real_timer);
129     p->real_timer.data = (unsigned long) p;
130 
131     p->leader = 0;        /* session leadership doesn't inherit */
132     p->tty_old_pgrp = 0;
133     p->utime = p->stime = 0;
134     p->cutime = p->cstime = 0;
135     p->array = NULL;
136     p->lock_depth = -1;        /* -1 = no lock */
137     p->start_time = get_jiffies_64();
138     p->security = NULL;
139     p->io_context = NULL;
140 
141     retval = -ENOMEM;
142     if ((retval = security_task_alloc(p)))
143         goto bad_fork_cleanup;
144     /* copy all the process information */
145     if ((retval = copy_semundo(clone_flags, p)))
146         goto bad_fork_cleanup_security;
147     if ((retval = copy_files(clone_flags, p)))
148         goto bad_fork_cleanup_semundo;
149     if ((retval = copy_fs(clone_flags, p)))
150         goto bad_fork_cleanup_files;
151     if ((retval = copy_sighand(clone_flags, p)))
152         goto bad_fork_cleanup_fs;
153     if ((retval = copy_signal(clone_flags, p)))
154         goto bad_fork_cleanup_sighand;
155     if ((retval = copy_mm(clone_flags, p)))
156         goto bad_fork_cleanup_signal;
157     if ((retval = copy_namespace(clone_flags, p)))
158         goto bad_fork_cleanup_mm;
159     retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
160     if (retval)
161         goto bad_fork_cleanup_namespace;
162 
163     p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
164     /*
165      * Clear TID on mm_release()?
166      */
167     p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
168 
169     /*
170      * Syscall tracing should be turned off in the child regardless
171      * of CLONE_PTRACE.
172      */
173     clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
174 
175     /* Our parent execution domain becomes current domain
176        These must match for thread signalling to apply */
177        
178     p->parent_exec_id = p->self_exec_id;
179 
180     /* ok, now we should be set up.. */
181     p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
182     p->pdeath_signal = 0;
183 
184     /*
185      * Share the timeslice between parent and child, thus the
186      * total amount of pending timeslices in the system doesn't change,
187      * resulting in more scheduling fairness.
188      */
189     local_irq_disable();
190         p->time_slice = (current->time_slice + 1) >> 1;
191     /*
192      * The remainder of the first timeslice might be recovered by
193      * the parent if the child exits early enough.
194      */
195     p->first_time_slice = 1;
196     current->time_slice >>= 1;
197     p->timestamp = sched_clock();
198     if (!current->time_slice) {
199         /*
200           * This case is rare, it happens when the parent has only
201           * a single jiffy left from its timeslice. Taking the
202          * runqueue lock is not a problem.
203          */
204         current->time_slice = 1;
205         preempt_disable();
206         scheduler_tick(0, 0);
207         local_irq_enable();
208         preempt_enable();
209     } else
210         local_irq_enable();
211     /*
212      * Ok, add it to the run-queues and make it
213      * visible to the rest of the system.
214      *
215      * Let it rip!
216      */
217     p->tgid = p->pid;
218     p->group_leader = p;
219     INIT_LIST_HEAD(&p->ptrace_children);
220     INIT_LIST_HEAD(&p->ptrace_list);
221 
222     /* Need tasklist lock for parent etc handling! */
223     write_lock_irq(&tasklist_lock);
224     /*
225      * Check for pending SIGKILL! The new thread should not be allowed
226      * to slip out of an OOM kill. (or normal SIGKILL.)
227      */
228     if (sigismember(&current->pending.signal, SIGKILL)) {
229         write_unlock_irq(&tasklist_lock);
230         retval = -EINTR;
231         goto bad_fork_cleanup_namespace;
232     }
233 
234     /* CLONE_PARENT re-uses the old parent */
235     if (clone_flags & CLONE_PARENT)
236         p->real_parent = current->real_parent;
237     else
238         p->real_parent = current;
239     p->parent = p->real_parent;
240 
241     if (clone_flags & CLONE_THREAD) {
242         spin_lock(&current->sighand->siglock);
243         /*
244          * Important: if an exit-all has been started then
245          * do not create this new thread - the whole thread
246          * group is supposed to exit anyway.
247          */
248         if (current->signal->group_exit) {
249             spin_unlock(&current->sighand->siglock);
250             write_unlock_irq(&tasklist_lock);
251             retval = -EAGAIN;
252             goto bad_fork_cleanup_namespace;
253         }
254         p->tgid = current->tgid;
255         p->group_leader = current->group_leader;
256 
257         if (current->signal->group_stop_count > 0) {
258             /*
259              * There is an all-stop in progress for the group.
260              * We ourselves will stop as soon as we check signals.
261              * Make the new thread part of that group stop too.
262              */
263             current->signal->group_stop_count++;
264             set_tsk_thread_flag(p, TIF_SIGPENDING);
265         }
266 
267         spin_unlock(&current->sighand->siglock);
268     }
269 
270     SET_LINKS(p);
271     if (p->ptrace & PT_PTRACED)
272         __ptrace_link(p, current->parent);
273 
274     attach_pid(p, PIDTYPE_PID, p->pid);
275     if (thread_group_leader(p)) {
276         attach_pid(p, PIDTYPE_TGID, p->tgid);
277         attach_pid(p, PIDTYPE_PGID, process_group(p));
278         attach_pid(p, PIDTYPE_SID, p->session);
279         if (p->pid)
280             __get_cpu_var(process_counts)++;
281     } else
282         link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
283 
284     nr_threads++;
285     write_unlock_irq(&tasklist_lock);
286     retval = 0;
287 
288 fork_out:
289     if (retval)
290         return ERR_PTR(retval);
291     return p;
292 
293 bad_fork_cleanup_namespace:
294     exit_namespace(p);
295 bad_fork_cleanup_mm:
296     exit_mm(p);
297 bad_fork_cleanup_signal:
298     exit_signal(p);
299 bad_fork_cleanup_sighand:
300     exit_sighand(p);
301 bad_fork_cleanup_fs:
302     exit_fs(p); /* blocking */
303 bad_fork_cleanup_files:
304     exit_files(p); /* blocking */
305 bad_fork_cleanup_semundo:
306     exit_sem(p);
307 bad_fork_cleanup_security:
308     security_task_free(p);
309 bad_fork_cleanup:
310     if (p->pid > 0)
311         free_pidmap(p->pid);
312     if (p->binfmt)
313         module_put(p->binfmt->module);
314 bad_fork_cleanup_put_domain:
315     module_put(p->thread_info->exec_domain->module);
316 bad_fork_cleanup_count:
317     atomic_dec(&p->user->processes);
318     free_uid(p->user);
319 bad_fork_free:
320     free_task(p);
321     goto fork_out;
322 }

 

17行:struct task_struct 结构体包含了进程相关的所有属性和信息(也叫进程控制块, Process Control Block, PCB)。包含:进程属性相关信息,进程间关系,进程调度信息,内存管理信息,文件管理信息,信号处理相关信息,资源限制相关信息。

19,26,34,41行:检查flags标记位, clone_flags 是在调用do_fork时的入参,不同的函数调用,参数不同。(通常对应的是不同的系统调用,fork,vfork)

53行:安全性检查,询问Linux Security Moudule(LSM)看当前任务是否可以创建一个新任务。

58行:为进程分配物理页面。其中调用 (alloc_task_struct,alloc_thread_info再调用__get_free_pages申请物理页面)

63,64行:检查进程资源限制。user指针指向user_struct 结构体,一共用户通常有多个进程,共享一个结构体。rlim指向资源限制结构体。

97行:复制flags, CLONE_IDLETASK代表0号进程。如果不是0号进程,申请pid。

101行:pid循环使用,使用pid位图来管理。默认pid最大值是32767,在64位系统中,用户可以通过写/proc/sys/kernel/pid_max文件,扩展到4194303。

142行:LSM Linux安全模块(后续学习)

145行:复制IPC信息。通过get_undo_list申请IPC结构体内存空间(是一个链表,并将链表放入undo_list中,将支持ipc的进程链接到一起。不支持ipc则设置为NULL.)

147行:复制已打开文件的控制结构,只有在CLONE_FILES标记位为0时才进行,否则共享父进程的结构。共享和复制的区别在于,如果是共享,子进程对文件操作会影响到父进程(比如lseek())。

149行:复制进程目录,权限等信息。(与copy_files() 类似)

151,153行:复制信号相关的数据。

155行:复制内存相关的数据。(内存相关的比较复杂,后续需要深究。)

157行:复制命名空间。(参考:https://cloud.tencent.com/developer/article/2129136)

159行:拷贝进程堆栈。

163,167行:set_child_tid 指向子进程的pid.当新进程执行时,将该进程pid。

178行:parent_exec_id 是父进程的执行域, self_exec_id 是本进程的执行域。

181,182行:exit_signal 是当前进程退出时向父进程发出的信号,pdeath_signal是父进程退出时,向子进程发出的信号。

190,196行:time_slice 是时间片。将当前进程的时间片分成两份,一份给当前进程,一份给子进程。

197行:获取进程时间戳。

217-220行:将进程链接到一起,加入到进程队列中。

235-239行:设置父进程,考虑到被调试的情况,需要parent 和 real_parent。

270行:将子进程的task_struct 链入到内核的进程队列中。

274-282行:处理进程关系(还没搞清楚